Demonstrates date shifting of a CSV file.
Explore further
For detailed documentation that includes this code sample, see the following:
Code sample
C#
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
using
System
;
using
System.IO
;
using
System.Linq
;
using
Google.Api.Gax.ResourceNames
;
using
Google.Cloud.Dlp.V2
;
using
Google.Protobuf
;
public
class
DeidentifyWithDateShift
{
public
static
DeidentifyContentResponse
Deidentify
(
string
projectId
,
string
inputCsvFilePath
,
int
lowerBoundDays
,
int
upperBoundDays
,
string
dateFields
,
string
contextField
,
string
keyName
,
string
wrappedKey
)
{
var
hasKeyName
=
!
string
.
IsNullOrEmpty
(
keyName
);
var
hasWrappedKey
=
!
string
.
IsNullOrEmpty
(
wrappedKey
);
var
hasContext
=
!
string
.
IsNullOrEmpty
(
contextField
);
bool
allFieldsSet
=
hasKeyName
&&
hasWrappedKey
&&
hasContext
;
bool
noFieldsSet
=
!
hasKeyName
&&
!
hasWrappedKey
&&
!
hasContext
;
if
(
!
(
allFieldsSet
||
noFieldsSet
))
{
throw
new
ArgumentException
(
"Must specify ALL or NONE of: {contextFieldId, keyName, wrappedKey}!"
);
}
var
dlp
=
DlpServiceClient
.
Create
();
// Read file
var
csvLines
=
File
.
ReadAllLines
(
inputCsvFilePath
);
var
csvHeaders
=
csvLines
[
0
].
Split
(
','
);
var
csvRows
=
csvLines
.
Skip
(
1
).
ToArray
();
// Convert dates to protobuf format, and everything else to a string
var
protoHeaders
=
csvHeaders
.
Select
(
header
=
>
new
FieldId
{
Name
=
header
});
var
protoRows
=
csvRows
.
Select
(
csvRow
=
>
{
var
rowValues
=
csvRow
.
Split
(
','
);
var
protoValues
=
rowValues
.
Select
(
rowValue
=
>
System
.
DateTime
.
TryParse
(
rowValue
,
out
var
parsedDate
)
?
new
Value
{
DateValue
=
Google
.
Type
.
Date
.
FromDateTime
(
parsedDate
)
}
:
new
Value
{
StringValue
=
rowValue
});
var
rowObject
=
new
Table
.
Types
.
Row
();
rowObject
.
Values
.
Add
(
protoValues
);
return
rowObject
;
});
var
dateFieldList
=
dateFields
.
Split
(
','
)
.
Select
(
field
=
>
new
FieldId
{
Name
=
field
});
// Construct + execute the request
var
dateShiftConfig
=
new
DateShiftConfig
{
LowerBoundDays
=
lowerBoundDays
,
UpperBoundDays
=
upperBoundDays
};
dateShiftConfig
.
Context
=
new
FieldId
{
Name
=
contextField
};
dateShiftConfig
.
CryptoKey
=
new
CryptoKey
{
KmsWrapped
=
new
KmsWrappedCryptoKey
{
WrappedKey
=
ByteString
.
FromBase64
(
wrappedKey
),
CryptoKeyName
=
keyName
}
};
var
deidConfig
=
new
DeidentifyConfig
{
RecordTransformations
=
new
RecordTransformations
{
FieldTransformations
=
{
new
FieldTransformation
{
PrimitiveTransformation
=
new
PrimitiveTransformation
{
DateShiftConfig
=
dateShiftConfig
},
Fields
=
{
dateFieldList
}
}
}
}
};
var
response
=
dlp
.
DeidentifyContent
(
new
DeidentifyContentRequest
{
Parent
=
new
LocationName
(
projectId
,
"global"
).
ToString
(),
DeidentifyConfig
=
deidConfig
,
Item
=
new
ContentItem
{
Table
=
new
Table
{
Headers
=
{
protoHeaders
},
Rows
=
{
protoRows
}
}
}
});
return
response
;
}
}
Go
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
(
"context"
"fmt"
"io"
dlp
"cloud.google.com/go/dlp/apiv2"
"cloud.google.com/go/dlp/apiv2/dlppb"
)
// deidentifyDateShift shifts dates found in the input between lowerBoundDays and
// upperBoundDays.
func
deidentifyDateShift
(
w
io
.
Writer
,
projectID
string
,
lowerBoundDays
,
upperBoundDays
int32
,
input
string
)
error
{
// projectID := "my-project-id"
// lowerBoundDays := -1
// upperBound := -1
// input := "2016-01-10"
// Will print "2016-01-09"
ctx
:=
context
.
Background
()
client
,
err
:=
dlp
.
NewClient
(
ctx
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"dlp.NewClient: %w"
,
err
)
}
defer
client
.
Close
()
// Create a configured request.
req
:=
& dlppb
.
DeidentifyContentRequest
{
Parent
:
fmt
.
Sprintf
(
"projects/%s/locations/global"
,
projectID
),
DeidentifyConfig
:
& dlppb
.
DeidentifyConfig
{
Transformation
:
& dlppb
.
DeidentifyConfig_InfoTypeTransformations
{
InfoTypeTransformations
:
& dlppb
.
InfoTypeTransformations
{
Transformations
:
[]
*
dlppb
.
InfoTypeTransformations_InfoTypeTransformation
{
{
InfoTypes
:
[]
*
dlppb
.
InfoType
{},
// Match all info types.
PrimitiveTransformation
:
& dlppb
.
PrimitiveTransformation
{
Transformation
:
& dlppb
.
PrimitiveTransformation_DateShiftConfig
{
DateShiftConfig
:
& dlppb
.
DateShiftConfig
{
LowerBoundDays
:
lowerBoundDays
,
UpperBoundDays
:
upperBoundDays
,
},
},
},
},
},
},
},
},
// The InspectConfig is used to identify the DATE fields.
InspectConfig
:
& dlppb
.
InspectConfig
{
InfoTypes
:
[]
*
dlppb
.
InfoType
{
{
Name
:
"DATE"
,
},
},
},
// The item to analyze.
Item
:
& dlppb
.
ContentItem
{
DataItem
:
& dlppb
.
ContentItem_Value
{
Value
:
input
,
},
},
}
// Send the request.
r
,
err
:=
client
.
DeidentifyContent
(
ctx
,
req
)
if
err
!=
nil
{
return
fmt
.
Errorf
(
"DeidentifyContent: %w"
,
err
)
}
// Print the result.
fmt
.
Fprint
(
w
,
r
.
GetItem
().
GetValue
())
return
nil
}
Java
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
com.google.cloud.dlp.v2. DlpServiceClient
;
import
com.google.common.base.Splitter
;
import
com.google.privacy.dlp.v2. ContentItem
;
import
com.google.privacy.dlp.v2. DateShiftConfig
;
import
com.google.privacy.dlp.v2. DeidentifyConfig
;
import
com.google.privacy.dlp.v2. DeidentifyContentRequest
;
import
com.google.privacy.dlp.v2. DeidentifyContentResponse
;
import
com.google.privacy.dlp.v2. FieldId
;
import
com.google.privacy.dlp.v2. FieldTransformation
;
import
com.google.privacy.dlp.v2. LocationName
;
import
com.google.privacy.dlp.v2. PrimitiveTransformation
;
import
com.google.privacy.dlp.v2. RecordTransformations
;
import
com.google.privacy.dlp.v2. Table
;
import
com.google.privacy.dlp.v2. Value
;
import
com.google.type. Date
;
import
java.io.BufferedReader
;
import
java.io.BufferedWriter
;
import
java.io.IOException
;
import
java.nio.file.Files
;
import
java.nio.file.Path
;
import
java.nio.file.Paths
;
import
java.time.LocalDate
;
import
java.time.format.DateTimeFormatter
;
import
java.util.Arrays
;
import
java.util.List
;
import
java.util.stream.Collectors
;
public
class
DeIdentifyWithDateShift
{
public
static
void
main
(
String
[]
args
)
throws
Exception
{
// TODO(developer): Replace these variables before running the sample.
String
projectId
=
"your-project-id"
;
Path
inputCsvFile
=
Paths
.
get
(
"path/to/your/input/file.csv"
);
Path
outputCsvFile
=
Paths
.
get
(
"path/to/your/output/file.csv"
);
deIdentifyWithDateShift
(
projectId
,
inputCsvFile
,
outputCsvFile
);
}
public
static
void
deIdentifyWithDateShift
(
String
projectId
,
Path
inputCsvFile
,
Path
outputCsvFile
)
throws
IOException
{
// Initialize client that will be used to send requests. This client only needs to be created
// once, and can be reused for multiple requests. After completing all of your requests, call
// the "close" method on the client to safely clean up any remaining background resources.
try
(
DlpServiceClient
dlp
=
DlpServiceClient
.
create
())
{
// Read the contents of the CSV file into a Table
List<FieldId>
headers
;
List<Table
.
Row
>
rows
;
try
(
BufferedReader
input
=
Files
.
newBufferedReader
(
inputCsvFile
))
{
// Parse and convert the first line into header names
headers
=
Arrays
.
stream
(
input
.
readLine
().
split
(
","
))
.
map
(
header
-
>
FieldId
.
newBuilder
().
setName
(
header
).
build
())
.
collect
(
Collectors
.
toList
());
// Parse the remainder of the file as Table.Rows
rows
=
input
.
lines
().
map
(
DeIdentifyWithDateShift
::
parseLineAsRow
).
collect
(
Collectors
.
toList
());
}
Table
table
=
Table
.
newBuilder
().
addAllHeaders
(
headers
).
addAllRows
(
rows
).
build
();
ContentItem
item
=
ContentItem
.
newBuilder
().
setTable
(
table
).
build
();
// Set the maximum days to shift dates backwards (lower bound) or forward (upper bound)
DateShiftConfig
dateShiftConfig
=
DateShiftConfig
.
newBuilder
().
setLowerBoundDays
(
5
).
setUpperBoundDays
(
5
).
build
();
PrimitiveTransformation
transformation
=
PrimitiveTransformation
.
newBuilder
().
setDateShiftConfig
(
dateShiftConfig
).
build
();
// Specify which fields the DateShift should apply too
List<FieldId>
dateFields
=
Arrays
.
asList
(
headers
.
get
(
1
),
headers
.
get
(
3
));
FieldTransformation
fieldTransformation
=
FieldTransformation
.
newBuilder
()
.
addAllFields
(
dateFields
)
.
setPrimitiveTransformation
(
transformation
)
.
build
();
RecordTransformations
recordTransformations
=
RecordTransformations
.
newBuilder
().
addFieldTransformations
(
fieldTransformation
).
build
();
// Specify the config for the de-identify request
DeidentifyConfig
deidentifyConfig
=
DeidentifyConfig
.
newBuilder
().
setRecordTransformations
(
recordTransformations
).
build
();
// Combine configurations into a request for the service.
DeidentifyContentRequest
request
=
DeidentifyContentRequest
.
newBuilder
()
.
setParent
(
LocationName
.
of
(
projectId
,
"global"
).
toString
())
.
setItem
(
item
)
.
setDeidentifyConfig
(
deidentifyConfig
)
.
build
();
// Send the request and receive response from the service
DeidentifyContentResponse
response
=
dlp
.
deidentifyContent
(
request
);
// Write the results to the target CSV file
try
(
BufferedWriter
writer
=
Files
.
newBufferedWriter
(
outputCsvFile
))
{
Table
outTable
=
response
.
getItem
().
getTable
();
String
headerOut
=
outTable
.
getHeadersList
().
stream
()
.
map
(
FieldId
::
getName
)
.
collect
(
Collectors
.
joining
(
","
));
writer
.
write
(
headerOut
+
"\n"
);
List<String>
rowOutput
=
outTable
.
getRowsList
().
stream
()
.
map
(
row
-
>
joinRow
(
row
.
getValuesList
()))
.
collect
(
Collectors
.
toList
());
for
(
String
line
:
rowOutput
)
{
writer
.
write
(
line
+
"\n"
);
}
System
.
out
.
println
(
"Content written to file: "
+
outputCsvFile
.
toString
());
}
}
}
// Convert the string from the csv file into com.google.type.Date
public
static
Date
parseAsDate
(
String
s
)
{
LocalDate
date
=
LocalDate
.
parse
(
s
,
DateTimeFormatter
.
ofPattern
(
"MM/dd/yyyy"
));
return
Date
.
newBuilder
()
.
setDay
(
date
.
getDayOfMonth
())
.
setMonth
(
date
.
getMonthValue
())
.
setYear
(
date
.
getYear
())
.
build
();
}
// Each row is in the format: Name,BirthDate,CreditCardNumber,RegisterDate
public
static
Table
.
Row
parseLineAsRow
(
String
line
)
{
List<String>
values
=
Splitter
.
on
(
","
).
splitToList
(
line
);
Value
name
=
Value
.
newBuilder
().
setStringValue
(
values
.
get
(
0
)).
build
();
Value
birthDate
=
Value
.
newBuilder
().
setDateValue
(
parseAsDate
(
values
.
get
(
1
))).
build
();
Value
creditCardNumber
=
Value
.
newBuilder
().
setStringValue
(
values
.
get
(
2
)).
build
();
Value
registerDate
=
Value
.
newBuilder
().
setDateValue
(
parseAsDate
(
values
.
get
(
3
))).
build
();
return
Table
.
Row
.
newBuilder
()
.
addValues
(
name
)
.
addValues
(
birthDate
)
.
addValues
(
creditCardNumber
)
.
addValues
(
registerDate
)
.
build
();
}
public
static
String
formatDate
(
Date
d
)
{
return
String
.
format
(
"%s/%s/%s"
,
d
.
getMonth
(),
d
.
getDay
(),
d
.
getYear
());
}
public
static
String
joinRow
(
List<Value>
values
)
{
String
name
=
values
.
get
(
0
).
getStringValue
();
String
birthDate
=
formatDate
(
values
.
get
(
1
).
getDateValue
());
String
creditCardNumber
=
values
.
get
(
2
).
getStringValue
();
String
registerDate
=
formatDate
(
values
.
get
(
3
).
getDateValue
());
return
String
.
join
(
","
,
name
,
birthDate
,
creditCardNumber
,
registerDate
);
}
}
Node.js
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
// Imports the Google Cloud Data Loss Prevention library
const
DLP
=
require
(
'@google-cloud/dlp'
);
// Instantiates a client
const
dlp
=
new
DLP
.
DlpServiceClient
();
// Import other required libraries
const
fs
=
require
(
'fs'
);
// The project ID to run the API call under
// const projectId = 'my-project';
// The path to the CSV file to deidentify
// The first row of the file must specify column names, and all other rows
// must contain valid values
// const inputCsvFile = '/path/to/input/file.csv';
// The path to save the date-shifted CSV file to
// const outputCsvFile = '/path/to/output/file.csv';
// The list of (date) fields in the CSV file to date shift
// const dateFields = [{ name: 'birth_date'}, { name: 'register_date' }];
// The maximum number of days to shift a date backward
// const lowerBoundDays = 1;
// The maximum number of days to shift a date forward
// const upperBoundDays = 1;
// (Optional) The column to determine date shift amount based on
// If this is not specified, a random shift amount will be used for every row
// If this is specified, then 'wrappedKey' and 'keyName' must also be set
// const contextFieldId = [{ name: 'user_id' }];
// (Optional) The name of the Cloud KMS key used to encrypt ('wrap') the AES-256 key
// If this is specified, then 'wrappedKey' and 'contextFieldId' must also be set
// const keyName = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME';
// (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates
// This key should be encrypted using the Cloud KMS key specified above
// If this is specified, then 'keyName' and 'contextFieldId' must also be set
// const wrappedKey = 'YOUR_ENCRYPTED_AES_256_KEY'
// Helper function for converting CSV rows to Protobuf types
const
rowToProto
=
row
=
>
{
const
values
=
row
.
split
(
','
);
const
convertedValues
=
values
.
map
(
value
=
>
{
if
(
Date
.
parse
(
value
))
{
const
date
=
new
Date
(
value
);
return
{
dateValue
:
{
year
:
date
.
getFullYear
(),
month
:
date
.
getMonth
()
+
1
,
day
:
date
.
getDate
(),
},
};
}
else
{
// Convert all non-date values to strings
return
{
stringValue
:
value
.
toString
()};
}
});
return
{
values
:
convertedValues
};
};
async
function
deidentifyWithDateShift
()
{
// Read and parse a CSV file
const
csvLines
=
fs
.
readFileSync
(
inputCsvFile
)
.
toString
()
.
split
(
'\n'
)
.
filter
(
line
=
>
line
.
includes
(
','
));
const
csvHeaders
=
csvLines
[
0
].
split
(
','
);
const
csvRows
=
csvLines
.
slice
(
1
);
// Construct the table object
const
tableItem
=
{
table
:
{
headers
:
csvHeaders
.
map
(
header
=
>
{
return
{
name
:
header
};
}),
rows
:
csvRows
.
map
(
row
=
>
rowToProto
(
row
)),
},
};
// Construct DateShiftConfig
const
dateShiftConfig
=
{
lowerBoundDays
:
lowerBoundDays
,
upperBoundDays
:
upperBoundDays
,
};
if
(
contextFieldId
&&
keyName
&&
wrappedKey
)
{
dateShiftConfig
.
context
=
{
name
:
contextFieldId
};
dateShiftConfig
.
cryptoKey
=
{
kmsWrapped
:
{
wrappedKey
:
wrappedKey
,
cryptoKeyName
:
keyName
,
},
};
}
else
if
(
contextFieldId
||
keyName
||
wrappedKey
)
{
throw
new
Error
(
'You must set either ALL or NONE of {contextFieldId, keyName, wrappedKey}!'
);
}
// Construct deidentification request
const
request
=
{
parent
:
`projects/
${
projectId
}
/locations/global`
,
deidentifyConfig
:
{
recordTransformations
:
{
fieldTransformations
:
[
{
fields
:
dateFields
,
primitiveTransformation
:
{
dateShiftConfig
:
dateShiftConfig
,
},
},
],
},
},
item
:
tableItem
,
};
// Run deidentification request
const
[
response
]
=
await
dlp
.
deidentifyContent
(
request
);
const
tableRows
=
response
.
item
.
table
.
rows
;
// Write results to a CSV file
tableRows
.
forEach
((
row
,
rowIndex
)
=
>
{
const
rowValues
=
row
.
values
.
map
(
value
=
>
value
.
stringValue
||
`
${
value
.
dateValue
.
month
}
/
${
value
.
dateValue
.
day
}
/
${
value
.
dateValue
.
year
}
`
);
csvLines
[
rowIndex
+
1
]
=
rowValues
.
join
(
','
);
});
csvLines
.
push
(
''
);
fs
.
writeFileSync
(
outputCsvFile
,
csvLines
.
join
(
'\n'
));
// Print status
console
.
log
(
`Successfully saved date-shift output to
${
outputCsvFile
}
`
);
}
deidentifyWithDateShift
();
PHP
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
use DateTime;
use Exception;
use Google\Cloud\Dlp\V2\Client\DlpServiceClient;
use Google\Cloud\Dlp\V2\ContentItem;
use Google\Cloud\Dlp\V2\CryptoKey;
use Google\Cloud\Dlp\V2\DateShiftConfig;
use Google\Cloud\Dlp\V2\DeidentifyConfig;
use Google\Cloud\Dlp\V2\DeidentifyContentRequest;
use Google\Cloud\Dlp\V2\FieldId;
use Google\Cloud\Dlp\V2\FieldTransformation;
use Google\Cloud\Dlp\V2\KmsWrappedCryptoKey;
use Google\Cloud\Dlp\V2\PrimitiveTransformation;
use Google\Cloud\Dlp\V2\RecordTransformations;
use Google\Cloud\Dlp\V2\Table;
use Google\Cloud\Dlp\V2\Table\Row;
use Google\Cloud\Dlp\V2\Value;
use Google\Type\Date;
/**
* Deidentify dates in a CSV file by pseudorandomly shifting them.
* If contextFieldName is not specified, a random shift amount will be used for every row.
* If contextFieldName is specified, then 'wrappedKey' and 'keyName' must also be set.
*
* @param string $callingProjectId The GCP Project ID to run the API call under
* @param string $inputCsvFile The path to the CSV file to deidentify
* @param string $outputCsvFile The path to save the date-shifted CSV file to
* @param string $dateFieldNames The comma-separated list of (date) fields in the CSV file to date shift
* @param int $lowerBoundDays The maximum number of days to shift a date backward
* @param int $upperBoundDays The maximum number of days to shift a date forward
* @param string $contextFieldName (Optional) The column to determine date shift amount based on
* @param string $keyName (Optional) The encrypted ('wrapped') AES-256 key to use when shifting dates
* @param string $wrappedKey (Optional) The name of the Cloud KMS key used to encrypt (wrap) the AES-256 key
*/
function deidentify_dates(
string $callingProjectId,
string $inputCsvFile,
string $outputCsvFile,
string $dateFieldNames,
int $lowerBoundDays,
int $upperBoundDays,
string $contextFieldName = '',
string $keyName = '',
string $wrappedKey = ''
): void {
// Instantiate a client.
$dlp = new DlpServiceClient();
// Read a CSV file
$csvLines = file($inputCsvFile, FILE_IGNORE_NEW_LINES);
$csvHeaders = explode(',', $csvLines[0]);
$csvRows = array_slice($csvLines, 1);
// Convert CSV file into protobuf objects
$tableHeaders = array_map(function ($csvHeader) {
return (new FieldId)->setName($csvHeader);
}, $csvHeaders);
$tableRows = array_map(function ($csvRow) {
$rowValues = array_map(function ($csvValue) {
if ($csvDate = DateTime::createFromFormat('m/d/Y', $csvValue)) {
$date = (new Date())
->setYear((int) $csvDate->format('Y'))
->setMonth((int) $csvDate->format('m'))
->setDay((int) $csvDate->format('d'));
return (new Value())
->setDateValue($date);
} else {
return (new Value())
->setStringValue($csvValue);
}
}, explode(',', $csvRow));
return (new Row())
->setValues($rowValues);
}, $csvRows);
// Convert date fields into protobuf objects
$dateFields = array_map(function ($dateFieldName) {
return (new FieldId())->setName($dateFieldName);
}, explode(',', $dateFieldNames));
// Construct the table object
$table = (new Table())
->setHeaders($tableHeaders)
->setRows($tableRows);
$item = (new ContentItem())
->setTable($table);
// Construct dateShiftConfig
$dateShiftConfig = (new DateShiftConfig())
->setLowerBoundDays($lowerBoundDays)
->setUpperBoundDays($upperBoundDays);
if ($contextFieldName && $keyName && $wrappedKey) {
$contextField = (new FieldId())
->setName($contextFieldName);
// Create the wrapped crypto key configuration object
$kmsWrappedCryptoKey = (new KmsWrappedCryptoKey())
->setWrappedKey(base64_decode($wrappedKey))
->setCryptoKeyName($keyName);
$cryptoKey = (new CryptoKey())
->setKmsWrapped($kmsWrappedCryptoKey);
$dateShiftConfig
->setContext($contextField)
->setCryptoKey($cryptoKey);
} elseif ($contextFieldName || $keyName || $wrappedKey) {
throw new Exception('You must set either ALL or NONE of {$contextFieldName, $keyName, $wrappedKey}!');
}
// Create the information transform configuration objects
$primitiveTransformation = (new PrimitiveTransformation())
->setDateShiftConfig($dateShiftConfig);
$fieldTransformation = (new FieldTransformation())
->setPrimitiveTransformation($primitiveTransformation)
->setFields($dateFields);
$recordTransformations = (new RecordTransformations())
->setFieldTransformations([$fieldTransformation]);
// Create the deidentification configuration object
$deidentifyConfig = (new DeidentifyConfig())
->setRecordTransformations($recordTransformations);
$parent = "projects/$callingProjectId/locations/global";
// Run request
$deidentifyContentRequest = (new DeidentifyContentRequest())
->setParent($parent)
->setDeidentifyConfig($deidentifyConfig)
->setItem($item);
$response = $dlp->deidentifyContent($deidentifyContentRequest);
// Check for errors
foreach ($response->getOverview()->getTransformationSummaries() as $summary) {
foreach ($summary->getResults() as $result) {
if ($details = $result->getDetails()) {
printf('Error: %s' . PHP_EOL, $details);
return;
}
}
}
// Save the results to a file
$csvRef = fopen($outputCsvFile, 'w');
fputcsv($csvRef, $csvHeaders);
foreach ($response->getItem()->getTable()->getRows() as $tableRow) {
$values = array_map(function ($tableValue) {
if ($tableValue->getStringValue()) {
return $tableValue->getStringValue();
}
$protoDate = $tableValue->getDateValue();
$date = mktime(0, 0, 0, $protoDate->getMonth(), $protoDate->getDay(), $protoDate->getYear());
return strftime('%D', $date);
}, iterator_to_array($tableRow->getValues()));
fputcsv($csvRef, $values);
};
fclose($csvRef);
printf('Deidentified dates written to %s' . PHP_EOL, $outputCsvFile);
}
Python
To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .
To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .
import
base64
import
csv
from
datetime
import
datetime
from
typing
import
List
import
google.cloud.dlp
from
google.cloud.dlp_v2
import
types
def
deidentify_with_date_shift
(
project
:
str
,
input_csv_file
:
str
=
None
,
output_csv_file
:
str
=
None
,
date_fields
:
List
[
str
]
=
None
,
lower_bound_days
:
int
=
None
,
upper_bound_days
:
int
=
None
,
context_field_id
:
str
=
None
,
wrapped_key
:
str
=
None
,
key_name
:
str
=
None
,
)
-
> None
:
"""Uses the Data Loss Prevention API to deidentify dates in a CSV file by
pseudorandomly shifting them.
Args:
project: The Google Cloud project id to use as a parent resource.
input_csv_file: The path to the CSV file to deidentify. The first row
of the file must specify column names, and all other rows must
contain valid values.
output_csv_file: The path to save the date-shifted CSV file.
date_fields: The list of (date) fields in the CSV file to date shift.
Example: ['birth_date', 'register_date']
lower_bound_days: The maximum number of days to shift a date backward
upper_bound_days: The maximum number of days to shift a date forward
context_field_id: (Optional) The column to determine date shift amount
based on. If this is not specified, a random shift amount will be
used for every row. If this is specified, then 'wrappedKey' and
'keyName' must also be set. Example:
contextFieldId = [{ 'name': 'user_id' }]
key_name: (Optional) The name of the Cloud KMS key used to encrypt
('wrap') the AES-256 key. Example:
key_name = 'projects/YOUR_GCLOUD_PROJECT/locations/YOUR_LOCATION/
keyRings/YOUR_KEYRING_NAME/cryptoKeys/YOUR_KEY_NAME'
wrapped_key: (Optional) The encrypted ('wrapped') AES-256 key to use.
This key should be encrypted using the Cloud KMS key specified by
key_name.
Returns:
None; the response from the API is printed to the terminal.
"""
# Instantiate a client
dlp
=
google
.
cloud
.
dlp_v2
.
DlpServiceClient
()
# Convert the project id into a full resource id.
parent
=
f
"projects/
{
project
}
/locations/global"
# Convert date field list to Protobuf type
def
map_fields
(
field
:
str
)
-
> dict
:
return
{
"name"
:
field
}
if
date_fields
:
date_fields
=
map
(
map_fields
,
date_fields
)
else
:
date_fields
=
[]
f
=
[]
with
open
(
input_csv_file
)
as
csvfile
:
reader
=
csv
.
reader
(
csvfile
)
for
row
in
reader
:
f
.
append
(
row
)
# Helper function for converting CSV rows to Protobuf types
def
map_headers
(
header
:
str
)
-
> dict
:
return
{
"name"
:
header
}
def
map_data
(
value
:
str
)
-
> dict
:
try
:
date
=
datetime
.
strptime
(
value
,
"%m/
%d
/%Y"
)
return
{
"date_value"
:
{
"year"
:
date
.
year
,
"month"
:
date
.
month
,
"day"
:
date
.
day
}
}
except
ValueError
:
return
{
"string_value"
:
value
}
def
map_rows
(
row
:
str
)
-
> dict
:
return
{
"values"
:
map
(
map_data
,
row
)}
# Using the helper functions, convert CSV rows to protobuf-compatible
# dictionaries.
csv_headers
=
map
(
map_headers
,
f
[
0
])
csv_rows
=
map
(
map_rows
,
f
[
1
:])
# Construct the table dict
table_item
=
{
"table"
:
{
"headers"
:
csv_headers
,
"rows"
:
csv_rows
}}
# Construct date shift config
date_shift_config
=
{
"lower_bound_days"
:
lower_bound_days
,
"upper_bound_days"
:
upper_bound_days
,
}
# If using a Cloud KMS key, add it to the date_shift_config.
# The wrapped key is base64-encoded, but the library expects a binary
# string, so decode it here.
if
context_field_id
and
key_name
and
wrapped_key
:
date_shift_config
[
"context"
]
=
{
"name"
:
context_field_id
}
date_shift_config
[
"crypto_key"
]
=
{
"kms_wrapped"
:
{
"wrapped_key"
:
base64
.
b64decode
(
wrapped_key
),
"crypto_key_name"
:
key_name
,
}
}
elif
context_field_id
or
key_name
or
wrapped_key
:
raise
ValueError
(
"""You must set either ALL or NONE of
[context_field_id, key_name, wrapped_key]!"""
)
# Construct Deidentify Config
deidentify_config
=
{
"record_transformations"
:
{
"field_transformations"
:
[
{
"fields"
:
date_fields
,
"primitive_transformation"
:
{
"date_shift_config"
:
date_shift_config
},
}
]
}
}
# Write to CSV helper methods
def
write_header
(
header
:
types
.
storage
.
FieldId
)
-
> str
:
return
header
.
name
def
write_data
(
data
:
types
.
storage
.
Value
)
-
> str
:
return
data
.
string_value
or
"
{}
/
{}
/
{}
"
.
format
(
data
.
date_value
.
month
,
data
.
date_value
.
day
,
data
.
date_value
.
year
,
)
# Call the API
response
=
dlp
.
deidentify_content
(
request
=
{
"parent"
:
parent
,
"deidentify_config"
:
deidentify_config
,
"item"
:
table_item
,
}
)
# Write results to CSV file
with
open
(
output_csv_file
,
"w"
)
as
csvfile
:
write_file
=
csv
.
writer
(
csvfile
,
delimiter
=
","
)
write_file
.
writerow
(
map
(
write_header
,
response
.
item
.
table
.
headers
))
for
row
in
response
.
item
.
table
.
rows
:
write_file
.
writerow
(
map
(
write_data
,
row
.
values
))
# Print status
print
(
f
"Successfully saved date-shift output to
{
output_csv_file
}
"
)
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

