Compute l-diversity

Compute l-diversity with Cloud DLP. L-diversity, which is an extension of k-anonymity, measures the diversity of sensitive values for each column in which they occur. A dataset has l-diversity if, for every set of rows with identical quasi-identifiers, there are at least l distinct values for each sensitive attribute.

Explore further

For detailed documentation that includes this code sample, see the following:

Code sample

C#

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  using 
  
  Google.Api.Gax.ResourceNames 
 
 ; 
 using 
  
  Google.Cloud.Dlp.V2 
 
 ; 
 using 
  
  Google.Cloud.PubSub.V1 
 
 ; 
 using 
  
 Newtonsoft.Json 
 ; 
 using 
  
 System 
 ; 
 using 
  
 System.Collections.Generic 
 ; 
 using 
  
 System.Linq 
 ; 
 using 
  
 System.Threading 
 ; 
 using 
  
 System.Threading.Tasks 
 ; 
 using 
  
 static 
  
 Google 
 . 
 Cloud 
 . 
 Dlp 
 . 
 V2 
 . 
 Action 
 . 
 Types 
 ; 
 using 
  
 static 
  
 Google 
 . 
 Cloud 
 . 
 Dlp 
 . 
 V2 
 . 
 PrivacyMetric 
 . 
 Types 
 ; 
 public 
  
 class 
  
 RiskAnalysisCreateLDiversity 
 { 
  
 public 
  
 static 
  
 object 
  
 LDiversity 
 ( 
  
 string 
  
 callingProjectId 
 , 
  
 string 
  
 tableProjectId 
 , 
  
 string 
  
 datasetId 
 , 
  
 string 
  
 tableId 
 , 
  
 string 
  
 topicId 
 , 
  
 string 
  
 subscriptionId 
 , 
  
 IEnumerable<FieldId> 
  
 quasiIds 
 , 
  
 string 
  
 sensitiveAttribute 
 ) 
  
 { 
  
 var 
  
 dlp 
  
 = 
  
  DlpServiceClient 
 
 . 
  Create 
 
 (); 
  
 // Construct + submit the job 
  
 var 
  
 ldiversityConfig 
  
 = 
  
 new 
  
 LDiversityConfig 
  
 { 
  
 SensitiveAttribute 
  
 = 
  
 new 
  
  FieldId 
 
  
 { 
  
 Name 
  
 = 
  
 sensitiveAttribute 
  
 }, 
  
 QuasiIds 
  
 = 
  
 { 
  
 quasiIds 
  
 } 
  
 }; 
  
 var 
  
 config 
  
 = 
  
 new 
  
  RiskAnalysisJobConfig 
 
  
 { 
  
 PrivacyMetric 
  
 = 
  
 new 
  
  PrivacyMetric 
 
  
 { 
  
 LDiversityConfig 
  
 = 
  
 ldiversityConfig 
  
 }, 
  
 SourceTable 
  
 = 
  
 new 
  
  BigQueryTable 
 
  
 { 
  
 ProjectId 
  
 = 
  
 tableProjectId 
 , 
  
 DatasetId 
  
 = 
  
 datasetId 
 , 
  
 TableId 
  
 = 
  
 tableId 
  
 }, 
  
 Actions 
  
 = 
  
 { 
  
 new 
  
 Google 
 . 
 Cloud 
 . 
 Dlp 
 . 
 V2 
 . 
 Action 
  
 { 
  
 PubSub 
  
 = 
  
 new 
  
  PublishToPubSub 
 
  
 { 
  
 Topic 
  
 = 
  
 $"projects/{callingProjectId}/topics/{topicId}" 
  
 } 
  
 } 
  
 } 
  
 }; 
  
 var 
  
 submittedJob 
  
 = 
  
 dlp 
 . 
 CreateDlpJob 
 ( 
  
 new 
  
  CreateDlpJobRequest 
 
  
 { 
  
 ParentAsProjectName 
  
 = 
  
 new 
  
  ProjectName 
 
 ( 
 callingProjectId 
 ), 
  
 RiskJob 
  
 = 
  
 config 
  
 }); 
  
 // Listen to pub/sub for the job 
  
 var 
  
 subscriptionName 
  
 = 
  
 new 
  
  SubscriptionName 
 
 ( 
 callingProjectId 
 , 
  
 subscriptionId 
 ); 
  
 var 
  
 subscriber 
  
 = 
  
  SubscriberClient 
 
 . 
  CreateAsync 
 
 ( 
 subscriptionName 
 ). 
 Result 
 ; 
  
 // SimpleSubscriber runs your message handle function on multiple 
  
 // threads to maximize throughput. 
  
 var 
  
 done 
  
 = 
  
 new 
  
 ManualResetEventSlim 
 ( 
 false 
 ); 
  
 subscriber 
 . 
 StartAsync 
 (( 
  PubsubMessage 
 
  
 message 
 , 
  
 CancellationToken 
  
 cancel 
 ) 
  
 = 
>  
 { 
  
 if 
  
 ( 
 message 
 . 
 Attributes 
 [ 
 "DlpJobName" 
 ] 
  
 == 
  
 submittedJob 
 . 
 Name 
 ) 
  
 { 
  
 Thread 
 . 
 Sleep 
 ( 
 500 
 ); 
  
 // Wait for DLP API results to become consistent 
  
 done 
 . 
 Set 
 (); 
  
 return 
  
 Task 
 . 
 FromResult 
 ( 
  SubscriberClient 
 
 . 
  Reply 
 
 . 
  Ack 
 
 ); 
  
 } 
  
 else 
  
 { 
  
 return 
  
 Task 
 . 
 FromResult 
 ( 
  SubscriberClient 
 
 . 
  Reply 
 
 . 
  Nack 
 
 ); 
  
 } 
  
 }); 
  
 done 
 . 
 Wait 
 ( 
 TimeSpan 
 . 
 FromMinutes 
 ( 
 10 
 )); 
  
 // 10 minute timeout; may not work for large jobs 
  
 subscriber 
 . 
 StopAsync 
 ( 
 CancellationToken 
 . 
 None 
 ). 
 Wait 
 (); 
  
 // Process results 
  
 var 
  
 resultJob 
  
 = 
  
 dlp 
 . 
 GetDlpJob 
 ( 
  
 new 
  
  GetDlpJobRequest 
 
  
 { 
  
 DlpJobName 
  
 = 
  
  DlpJobName 
 
 . 
  Parse 
 
 ( 
 submittedJob 
 . 
 Name 
 ) 
  
 }); 
  
 var 
  
 result 
  
 = 
  
 resultJob 
 . 
 RiskDetails 
 . 
 LDiversityResult 
 ; 
  
 for 
  
 ( 
 var 
  
 bucketIdx 
  
 = 
  
 0 
 ; 
  
 bucketIdx 
 < 
 result 
 . 
 SensitiveValueFrequencyHistogramBuckets 
 . 
 Count 
 ; 
  
 bucketIdx 
 ++ 
 ) 
  
 { 
  
 var 
  
 bucket 
  
 = 
  
 result 
 . 
  SensitiveValueFrequencyHistogramBuckets 
 
 [ 
 bucketIdx 
 ]; 
  
 Console 
 . 
 WriteLine 
 ( 
 $"Bucket {bucketIdx}" 
 ); 
  
 Console 
 . 
 WriteLine 
 ( 
 $"  Bucket size range: [{bucket. SensitiveValueFrequencyLowerBound 
}, {bucket. SensitiveValueFrequencyUpperBound 
}]." 
 ); 
  
 Console 
 . 
 WriteLine 
 ( 
 $"  {bucket.BucketSize} unique value(s) total." 
 ); 
  
 foreach 
  
 ( 
 var 
  
 bucketValue 
  
 in 
  
 bucket 
 . 
 BucketValues 
 ) 
  
 { 
  
 // 'UnpackValue(x)' is a prettier version of 'x.toString()' 
  
 Console 
 . 
 WriteLine 
 ( 
 $"    Quasi-ID values: [{String.Join(',', bucketValue.QuasiIdsValues.Select(x => UnpackValue(x)))}]" 
 ); 
  
 Console 
 . 
 WriteLine 
 ( 
 $"    Class size: {bucketValue.EquivalenceClassSize}" 
 ); 
  
 foreach 
  
 ( 
 var 
  
 topValue 
  
 in 
  
 bucketValue 
 . 
  TopSensitiveValues 
 
 ) 
  
 { 
  
 Console 
 . 
 WriteLine 
 ( 
 $"    Sensitive value {UnpackValue(topValue. Value 
)} occurs {topValue.Count} time(s)." 
 ); 
  
 } 
  
 } 
  
 } 
  
 return 
  
 result 
 ; 
  
 } 
  
 public 
  
 static 
  
 string 
  
 UnpackValue 
 ( 
  Value 
 
  
 protoValue 
 ) 
  
 { 
  
 var 
  
 jsonValue 
  
 = 
  
 JsonConvert 
 . 
 DeserializeObject<Dictionary<string 
 , 
  
 object 
>> ( 
 protoValue 
 . 
 ToString 
 ()); 
  
 return 
  
 jsonValue 
 . 
  Values 
 
 . 
 ElementAt 
 ( 
 0 
 ). 
 ToString 
 (); 
  
 } 
 } 
 

Go

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 ( 
  
 "context" 
  
 "fmt" 
  
 "io" 
  
 "strings" 
  
 "time" 
  
 dlp 
  
 "cloud.google.com/go/dlp/apiv2" 
  
 "cloud.google.com/go/dlp/apiv2/dlppb" 
  
 "cloud.google.com/go/pubsub" 
 ) 
 // riskLDiversity computes the L Diversity of the given columns. 
 func 
  
 riskLDiversity 
 ( 
 w 
  
 io 
 . 
 Writer 
 , 
  
 projectID 
 , 
  
 dataProject 
 , 
  
 pubSubTopic 
 , 
  
 pubSubSub 
 , 
  
 datasetID 
 , 
  
 tableID 
 , 
  
 sensitiveAttribute 
  
 string 
 , 
  
 columnNames 
  
 ... 
 string 
 ) 
  
 error 
  
 { 
  
 // projectID := "my-project-id" 
  
 // dataProject := "bigquery-public-data" 
  
 // pubSubTopic := "dlp-risk-sample-topic" 
  
 // pubSubSub := "dlp-risk-sample-sub" 
  
 // datasetID := "nhtsa_traffic_fatalities" 
  
 // tableID := "accident_2015" 
  
 // sensitiveAttribute := "city" 
  
 // columnNames := "state_number", "county" 
  
 ctx 
  
 := 
  
 context 
 . 
 Background 
 () 
  
 client 
 , 
  
 err 
  
 := 
  
 dlp 
 . 
 NewClient 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "dlp.NewClient: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 defer 
  
 client 
 . 
 Close 
 () 
  
 // Create a PubSub Client used to listen for when the inspect job finishes. 
  
 pubsubClient 
 , 
  
 err 
  
 := 
  
 pubsub 
 . 
 NewClient 
 ( 
 ctx 
 , 
  
 projectID 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 defer 
  
 pubsubClient 
 . 
 Close 
 () 
  
 // Create a PubSub subscription we can use to listen for messages. 
  
 // Create the Topic if it doesn't exist. 
  
 t 
  
 := 
  
 pubsubClient 
 . 
 Topic 
 ( 
 pubSubTopic 
 ) 
  
 topicExists 
 , 
  
 err 
  
 := 
  
 t 
 . 
 Exists 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 if 
  
 ! 
 topicExists 
  
 { 
  
 if 
  
 t 
 , 
  
 err 
  
 = 
  
 pubsubClient 
 . 
 CreateTopic 
 ( 
 ctx 
 , 
  
 pubSubTopic 
 ); 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 } 
  
 // Create the Subscription if it doesn't exist. 
  
 s 
  
 := 
  
 pubsubClient 
 . 
 Subscription 
 ( 
 pubSubSub 
 ) 
  
 subExists 
 , 
  
 err 
  
 := 
  
 s 
 . 
 Exists 
 ( 
 ctx 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 if 
  
 ! 
 subExists 
  
 { 
  
 if 
  
 s 
 , 
  
 err 
  
 = 
  
 pubsubClient 
 . 
 CreateSubscription 
 ( 
 ctx 
 , 
  
 pubSubSub 
 , 
  
 pubsub 
 . 
 SubscriptionConfig 
 { 
 Topic 
 : 
  
 t 
 }); 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 err 
  
 } 
  
 } 
  
 // topic is the PubSub topic string where messages should be sent. 
  
 topic 
  
 := 
  
 "projects/" 
  
 + 
  
 projectID 
  
 + 
  
 "/topics/" 
  
 + 
  
 pubSubTopic 
  
 // Build the QuasiID slice. 
  
 var 
  
 q 
  
 [] 
 * 
 dlppb 
 . 
 FieldId 
  
 for 
  
 _ 
 , 
  
 c 
  
 := 
  
 range 
  
 columnNames 
  
 { 
  
 q 
  
 = 
  
 append 
 ( 
 q 
 , 
  
& dlppb 
 . 
 FieldId 
 { 
 Name 
 : 
  
 c 
 }) 
  
 } 
  
 // Create a configured request. 
  
 req 
  
 := 
  
& dlppb 
 . 
 CreateDlpJobRequest 
 { 
  
 Parent 
 : 
  
 fmt 
 . 
 Sprintf 
 ( 
 "projects/%s/locations/global" 
 , 
  
 projectID 
 ), 
  
 Job 
 : 
  
& dlppb 
 . 
 CreateDlpJobRequest_RiskJob 
 { 
  
 RiskJob 
 : 
  
& dlppb 
 . 
 RiskAnalysisJobConfig 
 { 
  
 // PrivacyMetric configures what to compute. 
  
 PrivacyMetric 
 : 
  
& dlppb 
 . 
 PrivacyMetric 
 { 
  
 Type 
 : 
  
& dlppb 
 . 
 PrivacyMetric_LDiversityConfig_ 
 { 
  
 LDiversityConfig 
 : 
  
& dlppb 
 . 
 PrivacyMetric_LDiversityConfig 
 { 
  
 QuasiIds 
 : 
  
 q 
 , 
  
 SensitiveAttribute 
 : 
  
& dlppb 
 . 
 FieldId 
 { 
  
 Name 
 : 
  
 sensitiveAttribute 
 , 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 // SourceTable describes where to find the data. 
  
 SourceTable 
 : 
  
& dlppb 
 . 
 BigQueryTable 
 { 
  
 ProjectId 
 : 
  
 dataProject 
 , 
  
 DatasetId 
 : 
  
 datasetID 
 , 
  
 TableId 
 : 
  
 tableID 
 , 
  
 }, 
  
 // Send a message to PubSub using Actions. 
  
 Actions 
 : 
  
 [] 
 * 
 dlppb 
 . 
 Action 
 { 
  
 { 
  
 Action 
 : 
  
& dlppb 
 . 
 Action_PubSub 
 { 
  
 PubSub 
 : 
  
& dlppb 
 . 
 Action_PublishToPubSub 
 { 
  
 Topic 
 : 
  
 topic 
 , 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 }, 
  
 } 
  
 // Create the risk job. 
  
 j 
 , 
  
 err 
  
 := 
  
 client 
 . 
 CreateDlpJob 
 ( 
 ctx 
 , 
  
 req 
 ) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "CreateDlpJob: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "Created job: %v\n" 
 , 
  
 j 
 . 
 GetName 
 ()) 
  
 // Wait for the risk job to finish by waiting for a PubSub message. 
  
 // This only waits for 10 minutes. For long jobs, consider using a truly 
  
 // asynchronous execution model such as Cloud Functions. 
  
 ctx 
 , 
  
 cancel 
  
 := 
  
 context 
 . 
 WithTimeout 
 ( 
 ctx 
 , 
  
 10 
 * 
 time 
 . 
 Minute 
 ) 
  
 defer 
  
 cancel 
 () 
  
 err 
  
 = 
  
 s 
 . 
 Receive 
 ( 
 ctx 
 , 
  
 func 
 ( 
 ctx 
  
 context 
 . 
 Context 
 , 
  
 msg 
  
 * 
 pubsub 
 . 
 Message 
 ) 
  
 { 
  
 // If this is the wrong job, do not process the result. 
  
 if 
  
 msg 
 . 
 Attributes 
 [ 
 "DlpJobName" 
 ] 
  
 != 
  
 j 
 . 
 GetName 
 () 
  
 { 
  
 msg 
 . 
 Nack 
 () 
  
 return 
  
 } 
  
 msg 
 . 
 Ack 
 () 
  
 time 
 . 
 Sleep 
 ( 
 500 
  
 * 
  
 time 
 . 
 Millisecond 
 ) 
  
 j 
 , 
  
 err 
  
 := 
  
 client 
 . 
 GetDlpJob 
 ( 
 ctx 
 , 
  
& dlppb 
 . 
 GetDlpJobRequest 
 { 
  
 Name 
 : 
  
 j 
 . 
 GetName 
 (), 
  
 }) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "GetDlpJob: %v" 
 , 
  
 err 
 ) 
  
 return 
  
 } 
  
 h 
  
 := 
  
 j 
 . 
 GetRiskDetails 
 (). 
 GetLDiversityResult 
 (). 
 GetSensitiveValueFrequencyHistogramBuckets 
 () 
  
 for 
  
 i 
 , 
  
 b 
  
 := 
  
 range 
  
 h 
  
 { 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "Histogram bucket %v\n" 
 , 
  
 i 
 ) 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "  Size range: [%v,%v]\n" 
 , 
  
 b 
 . 
 GetSensitiveValueFrequencyLowerBound 
 (), 
  
 b 
 . 
 GetSensitiveValueFrequencyUpperBound 
 ()) 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "  %v unique values total\n" 
 , 
  
 b 
 . 
 GetBucketSize 
 ()) 
  
 for 
  
 _ 
 , 
  
 v 
  
 := 
  
 range 
  
 b 
 . 
 GetBucketValues 
 () 
  
 { 
  
 var 
  
 qvs 
  
 [] 
 string 
  
 for 
  
 _ 
 , 
  
 qv 
  
 := 
  
 range 
  
 v 
 . 
 GetQuasiIdsValues 
 () 
  
 { 
  
 qvs 
  
 = 
  
 append 
 ( 
 qvs 
 , 
  
 qv 
 . 
 String 
 ()) 
  
 } 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "    QuasiID values: %s\n" 
 , 
  
 strings 
 . 
 Join 
 ( 
 qvs 
 , 
  
 ", " 
 )) 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "    Class size: %v\n" 
 , 
  
 v 
 . 
 GetEquivalenceClassSize 
 ()) 
  
 for 
  
 _ 
 , 
  
 sv 
  
 := 
  
 range 
  
 v 
 . 
 GetTopSensitiveValues 
 () 
  
 { 
  
 fmt 
 . 
 Fprintf 
 ( 
 w 
 , 
  
 "    Sensitive value %v occurs %v times\n" 
 , 
  
 sv 
 . 
 GetValue 
 (), 
  
 sv 
 . 
 GetCount 
 ()) 
  
 } 
  
 } 
  
 } 
  
 // Stop listening for more messages. 
  
 cancel 
 () 
  
 }) 
  
 if 
  
 err 
  
 != 
  
 nil 
  
 { 
  
 return 
  
 fmt 
 . 
 Errorf 
 ( 
 "Recieve: %w" 
 , 
  
 err 
 ) 
  
 } 
  
 return 
  
 nil 
 } 
 

Java

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 com.google.api.core. SettableApiFuture 
 
 ; 
 import 
  
 com.google.cloud.dlp.v2. DlpServiceClient 
 
 ; 
 import 
  
 com.google.cloud.dlp.v2. DlpServiceSettings 
 
 ; 
 import 
  
 com.google.cloud.pubsub.v1. AckReplyConsumer 
 
 ; 
 import 
  
 com.google.cloud.pubsub.v1. MessageReceiver 
 
 ; 
 import 
  
 com.google.cloud.pubsub.v1. Subscriber 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. Action 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. Action 
. PublishToPubSub 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. AnalyzeDataSourceRiskDetails 
. LDiversityResult 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. AnalyzeDataSourceRiskDetails 
. LDiversityResult 
. LDiversityEquivalenceClass 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. AnalyzeDataSourceRiskDetails 
. LDiversityResult 
. LDiversityHistogramBucket 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. BigQueryTable 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. CreateDlpJobRequest 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. DlpJob 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. FieldId 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. GetDlpJobRequest 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. LocationName 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. PrivacyMetric 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. PrivacyMetric 
. LDiversityConfig 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. RiskAnalysisJobConfig 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. Value 
 
 ; 
 import 
  
 com.google.privacy.dlp.v2. ValueFrequency 
 
 ; 
 import 
  
 com.google.pubsub.v1. ProjectSubscriptionName 
 
 ; 
 import 
  
 com.google.pubsub.v1. ProjectTopicName 
 
 ; 
 import 
  
 com.google.pubsub.v1. PubsubMessage 
 
 ; 
 import 
  
 java.io.IOException 
 ; 
 import 
  
 java.util.Arrays 
 ; 
 import 
  
 java.util.List 
 ; 
 import 
  
 java.util.concurrent.ExecutionException 
 ; 
 import 
  
 java.util.concurrent.TimeUnit 
 ; 
 import 
  
 java.util.concurrent.TimeoutException 
 ; 
 import 
  
 java.util.stream.Collectors 
 ; 
 import 
  
 org.threeten.bp.Duration 
 ; 
 @SuppressWarnings 
 ( 
 "checkstyle:AbbreviationAsWordInName" 
 ) 
 class 
 RiskAnalysisLDiversity 
  
 { 
  
 public 
  
 static 
  
 void 
  
 main 
 ( 
 String 
 [] 
  
 args 
 ) 
  
 throws 
  
 Exception 
  
 { 
  
 // TODO(developer): Replace these variables before running the sample. 
  
 String 
  
 projectId 
  
 = 
  
 "your-project-id" 
 ; 
  
 String 
  
 datasetId 
  
 = 
  
 "your-bigquery-dataset-id" 
 ; 
  
 String 
  
 tableId 
  
 = 
  
 "your-bigquery-table-id" 
 ; 
  
 String 
  
 topicId 
  
 = 
  
 "pub-sub-topic" 
 ; 
  
 String 
  
 subscriptionId 
  
 = 
  
 "pub-sub-subscription" 
 ; 
  
 calculateLDiversity 
 ( 
 projectId 
 , 
  
 datasetId 
 , 
  
 tableId 
 , 
  
 topicId 
 , 
  
 subscriptionId 
 ); 
  
 } 
  
 public 
  
 static 
  
 void 
  
 calculateLDiversity 
 ( 
  
 String 
  
 projectId 
 , 
  
 String 
  
 datasetId 
 , 
  
 String 
  
 tableId 
 , 
  
 String 
  
 topicId 
 , 
  
 String 
  
 subscriptionId 
 ) 
  
 throws 
  
 ExecutionException 
 , 
  
 InterruptedException 
 , 
  
 IOException 
  
 { 
  
 // Initialize client that will be used to send requests. This client only needs to be created 
  
 // once, and can be reused for multiple requests. After completing all of your requests, call 
  
 // the "close" method on the client to safely clean up any remaining background resources. 
  
  DlpServiceSettings 
 
 . 
 Builder 
  
 dlpServiceSettingsBuilder 
  
 = 
  
  DlpServiceSettings 
 
 . 
 newBuilder 
 (); 
  
 dlpServiceSettingsBuilder 
  
 . 
 getDlpJobSettings 
 () 
  
 . 
  setRetrySettings 
 
 ( 
  
 dlpServiceSettingsBuilder 
  
 . 
 getDlpJobSettings 
 () 
  
 . 
 getRetrySettings 
 () 
  
 . 
 toBuilder 
 () 
  
 . 
 setTotalTimeout 
 ( 
 Duration 
 . 
 ofSeconds 
 ( 
 600 
 )) 
  
 . 
 build 
 ()); 
  
 try 
  
 ( 
  DlpServiceClient 
 
  
 dlpServiceClient 
  
 = 
  
  DlpServiceClient 
 
 . 
 create 
 ( 
 dlpServiceSettingsBuilder 
 . 
 build 
 ())) 
  
 { 
  
 // Specify the BigQuery table to analyze 
  
  BigQueryTable 
 
  
 bigQueryTable 
  
 = 
  
  BigQueryTable 
 
 . 
 newBuilder 
 () 
  
 . 
 setProjectId 
 ( 
 projectId 
 ) 
  
 . 
 setDatasetId 
 ( 
 datasetId 
 ) 
  
 . 
 setTableId 
 ( 
 tableId 
 ) 
  
 . 
 build 
 (); 
  
 // These values represent the column names of quasi-identifiers to analyze 
  
 List<String> 
  
 quasiIds 
  
 = 
  
 Arrays 
 . 
 asList 
 ( 
 "Age" 
 , 
  
 "Mystery" 
 ); 
  
 // This value represents the column name to compare the quasi-identifiers against 
  
 String 
  
 sensitiveAttribute 
  
 = 
  
 "Name" 
 ; 
  
 // Configure the privacy metric for the job 
  
  FieldId 
 
  
 sensitiveAttributeField 
  
 = 
  
  FieldId 
 
 . 
 newBuilder 
 (). 
 setName 
 ( 
 sensitiveAttribute 
 ). 
 build 
 (); 
  
 List<FieldId> 
  
 quasiIdFields 
  
 = 
  
 quasiIds 
 . 
 stream 
 () 
  
 . 
 map 
 ( 
 columnName 
  
 - 
>  
  FieldId 
 
 . 
 newBuilder 
 (). 
 setName 
 ( 
 columnName 
 ). 
 build 
 ()) 
  
 . 
 collect 
 ( 
 Collectors 
 . 
 toList 
 ()); 
  
  LDiversityConfig 
 
  
 ldiversityConfig 
  
 = 
  
  LDiversityConfig 
 
 . 
 newBuilder 
 () 
  
 . 
 addAllQuasiIds 
 ( 
 quasiIdFields 
 ) 
  
 . 
  setSensitiveAttribute 
 
 ( 
 sensitiveAttributeField 
 ) 
  
 . 
 build 
 (); 
  
  PrivacyMetric 
 
  
 privacyMetric 
  
 = 
  
  PrivacyMetric 
 
 . 
 newBuilder 
 (). 
  setLDiversityConfig 
 
 ( 
 ldiversityConfig 
 ). 
 build 
 (); 
  
 // Create action to publish job status notifications over Google Cloud Pub/ 
  
  ProjectTopicName 
 
  
 topicName 
  
 = 
  
  ProjectTopicName 
 
 . 
 of 
 ( 
 projectId 
 , 
  
 topicId 
 ); 
  
  PublishToPubSub 
 
  
 publishToPubSub 
  
 = 
  
  PublishToPubSub 
 
 . 
 newBuilder 
 (). 
 setTopic 
 ( 
 topicName 
 . 
  toString 
 
 ()). 
 build 
 (); 
  
  Action 
 
  
 action 
  
 = 
  
  Action 
 
 . 
 newBuilder 
 (). 
  setPubSub 
 
 ( 
 publishToPubSub 
 ). 
 build 
 (); 
  
 // Configure the risk analysis job to perform 
  
  RiskAnalysisJobConfig 
 
  
 riskAnalysisJobConfig 
  
 = 
  
  RiskAnalysisJobConfig 
 
 . 
 newBuilder 
 () 
  
 . 
  setSourceTable 
 
 ( 
 bigQueryTable 
 ) 
  
 . 
  setPrivacyMetric 
 
 ( 
 privacyMetric 
 ) 
  
 . 
 addActions 
 ( 
 action 
 ) 
  
 . 
 build 
 (); 
  
 // Build the request to be sent by the client 
  
  CreateDlpJobRequest 
 
  
 createDlpJobRequest 
  
 = 
  
  CreateDlpJobRequest 
 
 . 
 newBuilder 
 () 
  
 . 
 setParent 
 ( 
  LocationName 
 
 . 
 of 
 ( 
 projectId 
 , 
  
 "global" 
 ). 
 toString 
 ()) 
  
 . 
  setRiskJob 
 
 ( 
 riskAnalysisJobConfig 
 ) 
  
 . 
 build 
 (); 
  
 // Send the request to the API using the client 
  
  DlpJob 
 
  
 dlpJob 
  
 = 
  
 dlpServiceClient 
 . 
 createDlpJob 
 ( 
 createDlpJobRequest 
 ); 
  
 // Set up a Pub/Sub subscriber to listen on the job completion status 
  
 final 
  
 SettableApiFuture<Boolean> 
  
 done 
  
 = 
  
  SettableApiFuture 
 
 . 
 create 
 (); 
  
  ProjectSubscriptionName 
 
  
 subscriptionName 
  
 = 
  
  ProjectSubscriptionName 
 
 . 
 of 
 ( 
 projectId 
 , 
  
 subscriptionId 
 ); 
  
  MessageReceiver 
 
  
 messageHandler 
  
 = 
  
 ( 
 PubsubMessage 
  
 pubsubMessage 
 , 
  
 AckReplyConsumer 
  
 ackReplyConsumer 
 ) 
  
 - 
>  
 { 
  
 handleMessage 
 ( 
 dlpJob 
 , 
  
 done 
 , 
  
 pubsubMessage 
 , 
  
 ackReplyConsumer 
 ); 
  
 }; 
  
  Subscriber 
 
  
 subscriber 
  
 = 
  
  Subscriber 
 
 . 
 newBuilder 
 ( 
 subscriptionName 
 , 
  
 messageHandler 
 ). 
 build 
 (); 
  
 subscriber 
 . 
  startAsync 
 
 (); 
  
 // Wait for job completion semi-synchronously 
  
 // For long jobs, consider using a truly asynchronous execution model such as Cloud Functions 
  
 try 
  
 { 
  
 done 
 . 
 get 
 ( 
 15 
 , 
  
 TimeUnit 
 . 
 MINUTES 
 ); 
  
 } 
  
 catch 
  
 ( 
 TimeoutException 
  
 e 
 ) 
  
 { 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Job was not completed after 15 minutes." 
 ); 
  
 return 
 ; 
  
 } 
  
 finally 
  
 { 
  
 subscriber 
 . 
 stopAsync 
 (); 
  
 subscriber 
 . 
 awaitTerminated 
 (); 
  
 } 
  
 // Build a request to get the completed job 
  
  GetDlpJobRequest 
 
  
 getDlpJobRequest 
  
 = 
  
  GetDlpJobRequest 
 
 . 
 newBuilder 
 (). 
 setName 
 ( 
 dlpJob 
 . 
  getName 
 
 ()). 
 build 
 (); 
  
 // Retrieve completed job status 
  
  DlpJob 
 
  
 completedJob 
  
 = 
  
 dlpServiceClient 
 . 
 getDlpJob 
 ( 
 getDlpJobRequest 
 ); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Job status: " 
  
 + 
  
 completedJob 
 . 
  getState 
 
 ()); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "Job name: " 
  
 + 
  
 dlpJob 
 . 
  getName 
 
 ()); 
  
 // Get the result and parse through and process the information 
  
  LDiversityResult 
 
  
 ldiversityResult 
  
 = 
  
 completedJob 
 . 
  getRiskDetails 
 
 (). 
 getLDiversityResult 
 (); 
  
 List<LDiversityHistogramBucket> 
  
 histogramBucketList 
  
 = 
  
 ldiversityResult 
 . 
 getSensitiveValueFrequencyHistogramBucketsList 
 (); 
  
 for 
  
 ( 
  LDiversityHistogramBucket 
 
  
 result 
  
 : 
  
 histogramBucketList 
 ) 
  
 { 
  
 for 
  
 ( 
  LDiversityEquivalenceClass 
 
  
 bucket 
  
 : 
  
 result 
 . 
 getBucketValuesList 
 ()) 
  
 { 
  
 List<String> 
  
 quasiIdValues 
  
 = 
  
 bucket 
 . 
 getQuasiIdsValuesList 
 (). 
 stream 
 () 
  
 . 
 map 
 ( 
 Value 
 :: 
 toString 
 ) 
  
 . 
 collect 
 ( 
 Collectors 
 . 
 toList 
 ()); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "\tQuasi-ID values: " 
  
 + 
  
 String 
 . 
 join 
 ( 
 ", " 
 , 
  
 quasiIdValues 
 )); 
  
 System 
 . 
 out 
 . 
 println 
 ( 
 "\tClass size: " 
  
 + 
  
 bucket 
 . 
 getEquivalenceClassSize 
 ()); 
  
 for 
  
 ( 
  ValueFrequency 
 
  
 valueFrequency 
  
 : 
  
 bucket 
 . 
 getTopSensitiveValuesList 
 ()) 
  
 { 
  
 System 
 . 
 out 
 . 
 printf 
 ( 
  
 "\t\tSensitive value %s occurs %d time(s).\n" 
 , 
  
 valueFrequency 
 . 
 getValue 
 (). 
 toString 
 (), 
  
 valueFrequency 
 . 
 getCount 
 ()); 
  
 } 
  
 } 
  
 } 
  
 } 
  
 } 
  
 // handleMessage injects the job and settableFuture into the message reciever interface 
  
 private 
  
 static 
  
 void 
  
 handleMessage 
 ( 
  
  DlpJob 
 
  
 job 
 , 
  
 SettableApiFuture<Boolean> 
  
 done 
 , 
  
  PubsubMessage 
 
  
 pubsubMessage 
 , 
  
  AckReplyConsumer 
 
  
 ackReplyConsumer 
 ) 
  
 { 
  
 String 
  
 messageAttribute 
  
 = 
  
 pubsubMessage 
 . 
  getAttributesMap 
 
 (). 
 get 
 ( 
 "DlpJobName" 
 ); 
  
 if 
  
 ( 
 job 
 . 
  getName 
 
 (). 
 equals 
 ( 
 messageAttribute 
 )) 
  
 { 
  
 done 
 . 
 set 
 ( 
 true 
 ); 
  
  ack 
 
ReplyConsumer . 
  ack 
 
 (); 
  
 } 
  
 else 
  
 { 
  
 ackReplyConsumer 
 . 
  nack 
 
 (); 
  
 } 
  
 } 
 } 
 

Node.js

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  // Import the Google Cloud client libraries 
 const 
  
 DLP 
  
 = 
  
 require 
 ( 
 ' @google-cloud/dlp 
' 
 ); 
 const 
  
 { 
 PubSub 
 } 
  
 = 
  
 require 
 ( 
 ' @google-cloud/pubsub 
' 
 ); 
 // Instantiates clients 
 const 
  
 dlp 
  
 = 
  
 new 
  
 DLP 
 . 
  DlpServiceClient 
 
 (); 
 const 
  
 pubsub 
  
 = 
  
 new 
  
  PubSub 
 
 (); 
 // The project ID to run the API call under 
 // const projectId = 'my-project'; 
 // The project ID the table is stored under 
 // This may or (for public datasets) may not equal the calling project ID 
 // const tableProjectId = 'my-project'; 
 // The ID of the dataset to inspect, e.g. 'my_dataset' 
 // const datasetId = 'my_dataset'; 
 // The ID of the table to inspect, e.g. 'my_table' 
 // const tableId = 'my_table'; 
 // The name of the Pub/Sub topic to notify once the job completes 
 // TODO(developer): create a Pub/Sub topic to use for this 
 // const topicId = 'MY-PUBSUB-TOPIC' 
 // The name of the Pub/Sub subscription to use when listening for job 
 // completion notifications 
 // TODO(developer): create a Pub/Sub subscription to use for this 
 // const subscriptionId = 'MY-PUBSUB-SUBSCRIPTION' 
 // The column to measure l-diversity relative to, e.g. 'firstName' 
 // const sensitiveAttribute = 'name'; 
 // A set of columns that form a composite key ('quasi-identifiers') 
 // const quasiIds = [{ name: 'age' }, { name: 'city' }]; 
 async 
  
 function 
  
 lDiversityAnalysis 
 () 
  
 { 
  
 const 
  
 sourceTable 
  
 = 
  
 { 
  
 projectId 
 : 
  
 tableProjectId 
 , 
  
 datasetId 
 : 
  
 datasetId 
 , 
  
 tableId 
 : 
  
 tableId 
 , 
  
 }; 
  
 // Construct request for creating a risk analysis job 
  
 const 
  
 request 
  
 = 
  
 { 
  
 parent 
 : 
  
 `projects/ 
 ${ 
 projectId 
 } 
 /locations/global` 
 , 
  
 riskJob 
 : 
  
 { 
  
 privacyMetric 
 : 
  
 { 
  
 lDiversityConfig 
 : 
  
 { 
  
 quasiIds 
 : 
  
 quasiIds 
 , 
  
 sensitiveAttribute 
 : 
  
 { 
  
 name 
 : 
  
 sensitiveAttribute 
 , 
  
 }, 
  
 }, 
  
 }, 
  
 sourceTable 
 : 
  
 sourceTable 
 , 
  
 actions 
 : 
  
 [ 
  
 { 
  
 pubSub 
 : 
  
 { 
  
 topic 
 : 
  
 `projects/ 
 ${ 
 projectId 
 } 
 /topics/ 
 ${ 
 topicId 
 } 
 ` 
 , 
  
 }, 
  
 }, 
  
 ], 
  
 }, 
  
 }; 
  
 // Create helper function for unpacking values 
  
 const 
  
 getValue 
  
 = 
  
 obj 
  
 = 
>  
 obj 
 [ 
 Object 
 . 
 keys 
 ( 
 obj 
 )[ 
 0 
 ]]; 
  
 // Run risk analysis job 
  
 const 
  
 [ 
 topicResponse 
 ] 
  
 = 
  
 await 
  
 pubsub 
 . 
 topic 
 ( 
 topicId 
 ). 
 get 
 (); 
  
 const 
  
 subscription 
  
 = 
  
 await 
  
 topicResponse 
 . 
 subscription 
 ( 
 subscriptionId 
 ); 
  
 const 
  
 [ 
 jobsResponse 
 ] 
  
 = 
  
 await 
  
 dlp 
 . 
 createDlpJob 
 ( 
 request 
 ); 
  
 const 
  
 jobName 
  
 = 
  
 jobsResponse 
 . 
 name 
 ; 
  
 console 
 . 
 log 
 ( 
 `Job created. Job name: 
 ${ 
 jobName 
 } 
 ` 
 ); 
  
 // Watch the Pub/Sub topic until the DLP job finishes 
  
 await 
  
 new 
  
  Promise 
 
 (( 
 resolve 
 , 
  
 reject 
 ) 
  
 = 
>  
 { 
  
 const 
  
 messageHandler 
  
 = 
  
 message 
  
 = 
>  
 { 
  
 if 
  
 ( 
 message 
 . 
 attributes 
 && 
 message 
 . 
 attributes 
 . 
 DlpJobName 
  
 === 
  
 jobName 
 ) 
  
 { 
  
 message 
 . 
 ack 
 (); 
  
 subscription 
 . 
 removeListener 
 ( 
 'message' 
 , 
  
 messageHandler 
 ); 
  
 subscription 
 . 
 removeListener 
 ( 
 'error' 
 , 
  
 errorHandler 
 ); 
  
 resolve 
 ( 
 jobName 
 ); 
  
 } 
  
 else 
  
 { 
  
 message 
 . 
 nack 
 (); 
  
 } 
  
 }; 
  
 const 
  
 errorHandler 
  
 = 
  
 err 
  
 = 
>  
 { 
  
 subscription 
 . 
 removeListener 
 ( 
 'message' 
 , 
  
 messageHandler 
 ); 
  
 subscription 
 . 
 removeListener 
 ( 
 'error' 
 , 
  
 errorHandler 
 ); 
  
 reject 
 ( 
 err 
 ); 
  
 }; 
  
 subscripti on 
 
 . 
  on 
 
 ( 
 'message' 
 , 
  
 messageHandler 
 ); 
  
 subscripti on 
 
 . 
  on 
 
 ( 
 'error' 
 , 
  
 errorHandler 
 ); 
  
 }); 
  
 setTimeout 
 (() 
  
 = 
>  
 { 
  
 console 
 . 
 log 
 ( 
 ' Waiting for DLP job to fully complete' 
 ); 
  
 }, 
  
 500 
 ); 
  
 const 
  
 [ 
 job 
 ] 
  
 = 
  
 await 
  
 dlp 
 . 
 getDlpJob 
 ({ 
 name 
 : 
  
 jobName 
 }); 
  
 const 
  
 histogramBuckets 
  
 = 
  
 job 
 . 
 riskDetails 
 . 
 lDiversityResult 
 . 
 sensitiveValueFrequencyHistogramBuckets 
 ; 
  
 histogramBuckets 
 . 
 forEach 
 (( 
 histogramBucket 
 , 
  
 histogramBucketIdx 
 ) 
  
 = 
>  
 { 
  
 console 
 . 
 log 
 ( 
 `Bucket 
 ${ 
 histogramBucketIdx 
 } 
 :` 
 ); 
  
 console 
 . 
 log 
 ( 
  
 `Bucket size range: [ 
 ${ 
 histogramBucket 
 . 
 sensitiveValueFrequencyLowerBound 
 } 
 , 
 ${ 
 histogramBucket 
 . 
 sensitiveValueFrequencyUpperBound 
 } 
 ]` 
  
 ); 
  
 histogramBucket 
 . 
 bucketValues 
 . 
 forEach 
 ( 
 valueBucket 
  
 = 
>  
 { 
  
 const 
  
 quasiIdValues 
  
 = 
  
 valueBucket 
 . 
 quasiIdsValues 
  
 . 
 map 
 ( 
 getValue 
 ) 
  
 . 
 join 
 ( 
 ', ' 
 ); 
  
 console 
 . 
 log 
 ( 
 `  Quasi-ID values: { 
 ${ 
 quasiIdValues 
 } 
 }` 
 ); 
  
 console 
 . 
 log 
 ( 
 `  Class size: 
 ${ 
 valueBucket 
 . 
 equivalenceClassSize 
 } 
 ` 
 ); 
  
 valueBucket 
 . 
 topSensitiveValues 
 . 
 forEach 
 ( 
 valueObj 
  
 = 
>  
 { 
  
 console 
 . 
 log 
 ( 
  
 `    Sensitive value 
 ${ 
 getValue 
 ( 
 valueObj 
 . 
 value 
 ) 
 } 
 occurs 
 ${ 
  
 valueObj 
 . 
 count 
  
 } 
 time(s).` 
  
 ); 
  
 }); 
  
 }); 
  
 }); 
 } 
 await 
  
 lDiversityAnalysis 
 (); 
 

PHP

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  use Google\Cloud\Dlp\V2\Action; 
 use Google\Cloud\Dlp\V2\Action\PublishToPubSub; 
 use Google\Cloud\Dlp\V2\BigQueryTable; 
 use Google\Cloud\Dlp\V2\Client\DlpServiceClient; 
 use Google\Cloud\Dlp\V2\CreateDlpJobRequest; 
 use Google\Cloud\Dlp\V2\DlpJob\JobState; 
 use Google\Cloud\Dlp\V2\FieldId; 
 use Google\Cloud\Dlp\V2\GetDlpJobRequest; 
 use Google\Cloud\Dlp\V2\PrivacyMetric; 
 use Google\Cloud\Dlp\V2\PrivacyMetric\LDiversityConfig; 
 use Google\Cloud\Dlp\V2\RiskAnalysisJobConfig; 
 use Google\Cloud\PubSub\PubSubClient; 
 /** 
 * Computes the l-diversity of a column set in a Google BigQuery table. 
 * 
 * @param string    $callingProjectId    The project ID to run the API call under 
 * @param string    $dataProjectId       The project ID containing the target Datastore 
 * @param string    $topicId             The name of the Pub/Sub topic to notify once the job completes 
 * @param string    $subscriptionId      The name of the Pub/Sub subscription to use when listening for job 
 * @param string    $datasetId           The ID of the dataset to inspect 
 * @param string    $tableId             The ID of the table to inspect 
 * @param string    $sensitiveAttribute  The column to measure l-diversity relative to, e.g. "firstName" 
 * @param string[]  $quasiIdNames        Array columns that form a composite key (quasi-identifiers) 
 */ 
 function l_diversity( 
 string $callingProjectId, 
 string $dataProjectId, 
 string $topicId, 
 string $subscriptionId, 
 string $datasetId, 
 string $tableId, 
 string $sensitiveAttribute, 
 array $quasiIdNames 
 ): void { 
 // Instantiate a client. 
 $dlp = new DlpServiceClient(); 
 $pubsub = new PubSubClient(); 
 $topic = $pubsub->topic($topicId); 
 // Construct risk analysis config 
 $quasiIds = array_map( 
 function ($id) { 
 return (new FieldId())->setName($id); 
 }, 
 $quasiIdNames 
 ); 
 $sensitiveField = (new FieldId()) 
 ->setName($sensitiveAttribute); 
 $statsConfig = (new LDiversityConfig()) 
 ->setQuasiIds($quasiIds) 
 ->setSensitiveAttribute($sensitiveField); 
 $privacyMetric = (new PrivacyMetric()) 
 ->setLDiversityConfig($statsConfig); 
 // Construct items to be analyzed 
 $bigqueryTable = (new BigQueryTable()) 
 ->setProjectId($dataProjectId) 
 ->setDatasetId($datasetId) 
 ->setTableId($tableId); 
 // Construct the action to run when job completes 
 $pubSubAction = (new PublishToPubSub()) 
 ->setTopic($topic->name()); 
 $action = (new Action()) 
 ->setPubSub($pubSubAction); 
 // Construct risk analysis job config to run 
 $riskJob = (new RiskAnalysisJobConfig()) 
 ->setPrivacyMetric($privacyMetric) 
 ->setSourceTable($bigqueryTable) 
 ->setActions([$action]); 
 // Listen for job notifications via an existing topic/subscription. 
 $subscription = $topic->subscription($subscriptionId); 
 // Submit request 
 $parent = "projects/$callingProjectId/locations/global"; 
 $createDlpJobRequest = (new CreateDlpJobRequest()) 
 ->setParent($parent) 
 ->setRiskJob($riskJob); 
 $job = $dlp->createDlpJob($createDlpJobRequest); 
 // Poll Pub/Sub using exponential backoff until job finishes 
 // Consider using an asynchronous execution model such as Cloud Functions 
 $attempt = 1; 
 $startTime = time(); 
 do { 
 foreach ($subscription->pull() as $message) { 
 if ( 
 isset($message->attributes()['DlpJobName']) 
&& $message->attributes()['DlpJobName'] === $job->getName() 
 ) { 
 $subscription->acknowledge($message); 
 // Get the updated job. Loop to avoid race condition with DLP API. 
 do { 
 $getDlpJobRequest = (new GetDlpJobRequest()) 
 ->setName($job->getName()); 
 $job = $dlp->getDlpJob($getDlpJobRequest); 
 } while ($job->getState() == JobState::RUNNING); 
 break 2; // break from parent do while 
 } 
 } 
 print('Waiting for job to complete' . PHP_EOL); 
 // Exponential backoff with max delay of 60 seconds 
 sleep(min(60, pow(2, ++$attempt))); 
 } while (time() - $startTime < 600); // 10 minute timeout 
 // Print finding counts 
 printf('Job %s status: %s' . PHP_EOL, $job->getName(), JobState::name($job->getState())); 
 switch ($job->getState()) { 
 case JobState::DONE: 
 $histBuckets = $job->getRiskDetails()->getLDiversityResult()->getSensitiveValueFrequencyHistogramBuckets(); 
 foreach ($histBuckets as $bucketIndex => $histBucket) { 
 // Print bucket stats 
 printf('Bucket %s:' . PHP_EOL, $bucketIndex); 
 printf( 
 '  Bucket size range: [%s, %s]' . PHP_EOL, 
 $histBucket->getSensitiveValueFrequencyLowerBound(), 
 $histBucket->getSensitiveValueFrequencyUpperBound() 
 ); 
 // Print bucket values 
 foreach ($histBucket->getBucketValues() as $percent => $valueBucket) { 
 printf( 
 '  Class size: %s' . PHP_EOL, 
 $valueBucket->getEquivalenceClassSize() 
 ); 
 // Pretty-print quasi-ID values 
 print('  Quasi-ID values:' . PHP_EOL); 
 foreach ($valueBucket->getQuasiIdsValues() as $index => $value) { 
 print('    ' . $value->serializeToJsonString() . PHP_EOL); 
 } 
 // Pretty-print sensitive values 
 $topValues = $valueBucket->getTopSensitiveValues(); 
 foreach ($topValues as $topValue) { 
 printf( 
 '  Sensitive value %s occurs %s time(s).' . PHP_EOL, 
 $topValue->getValue()->serializeToJsonString(), 
 $topValue->getCount() 
 ); 
 } 
 } 
 } 
 break; 
 case JobState::FAILED: 
 printf('Job %s had errors:' . PHP_EOL, $job->getName()); 
 $errors = $job->getErrors(); 
 foreach ($errors as $error) { 
 var_dump($error->getDetails()); 
 } 
 break; 
 case JobState::PENDING: 
 print('Job has not completed. Consider a longer timeout or an asynchronous execution model' . PHP_EOL); 
 break; 
 default: 
 print('Unexpected job state. Most likely, the job is either running or has not yet started.'); 
 } 
 } 
 

Python

To learn how to install and use the client library for Sensitive Data Protection, see Sensitive Data Protection client libraries .

To authenticate to Sensitive Data Protection, set up Application Default Credentials. For more information, see Set up authentication for a local development environment .

  import 
  
 concurrent.futures 
 from 
  
 typing 
  
 import 
 List 
 import 
  
 google.cloud.dlp 
 from 
  
 google.cloud.dlp_v2 
  
 import 
 types 
 import 
  
 google.cloud.pubsub 
 def 
  
 l_diversity_analysis 
 ( 
 project 
 : 
 str 
 , 
 table_project_id 
 : 
 str 
 , 
 dataset_id 
 : 
 str 
 , 
 table_id 
 : 
 str 
 , 
 topic_id 
 : 
 str 
 , 
 subscription_id 
 : 
 str 
 , 
 sensitive_attribute 
 : 
 str 
 , 
 quasi_ids 
 : 
 List 
 [ 
 str 
 ], 
 timeout 
 : 
 int 
 = 
 300 
 , 
 ) 
 - 
> None 
 : 
  
 """Uses the Data Loss Prevention API to compute the l-diversity of a 
 column set in a Google BigQuery table. 
 Args: 
 project: The Google Cloud project id to use as a parent resource. 
 table_project_id: The Google Cloud project id where the BigQuery table 
 is stored. 
 dataset_id: The id of the dataset to inspect. 
 table_id: The id of the table to inspect. 
 topic_id: The name of the Pub/Sub topic to notify once the job 
 completes. 
 subscription_id: The name of the Pub/Sub subscription to use when 
 listening for job completion notifications. 
 sensitive_attribute: The column to measure l-diversity relative to. 
 quasi_ids: A set of columns that form a composite key. 
 timeout: The number of seconds to wait for a response from the API. 
 Returns: 
 None; the response from the API is printed to the terminal. 
 """ 
 # Create helper function for unpacking values 
 def 
  
 get_values 
 ( 
 obj 
 : 
 types 
 . 
  Value 
 
 ) 
 - 
> int 
 : 
 return 
 int 
 ( 
 obj 
 . 
 integer_value 
 ) 
 # Instantiate a client. 
 dlp 
 = 
 google 
 . 
 cloud 
 . 
  dlp_v2 
 
 . 
  DlpServiceClient 
 
 () 
 # Convert the project id into a full resource id. 
 topic 
 = 
 google 
 . 
 cloud 
 . 
 pubsub 
 . 
  PublisherClient 
 
 . 
 topic_path 
 ( 
 project 
 , 
 topic_id 
 ) 
 parent 
 = 
 f 
 "projects/ 
 { 
 project 
 } 
 /locations/global" 
 # Location info of the BigQuery table. 
 source_table 
 = 
 { 
 "project_id" 
 : 
 table_project_id 
 , 
 "dataset_id" 
 : 
 dataset_id 
 , 
 "table_id" 
 : 
 table_id 
 , 
 } 
 # Convert quasi id list to Protobuf type 
 def 
  
 map_fields 
 ( 
 field 
 : 
 str 
 ) 
 - 
> dict 
 : 
 return 
 { 
 "name" 
 : 
 field 
 } 
 quasi_ids 
 = 
 map 
 ( 
 map_fields 
 , 
 quasi_ids 
 ) 
 # Tell the API where to send a notification when the job is complete. 
 actions 
 = 
 [{ 
 "pub_sub" 
 : 
 { 
 "topic" 
 : 
 topic 
 }}] 
 # Configure risk analysis job 
 # Give the name of the numeric column to compute risk metrics for 
 risk_job 
 = 
 { 
 "privacy_metric" 
 : 
 { 
 "l_diversity_config" 
 : 
 { 
 "quasi_ids" 
 : 
 quasi_ids 
 , 
 "sensitive_attribute" 
 : 
 { 
 "name" 
 : 
 sensitive_attribute 
 }, 
 } 
 }, 
 "source_table" 
 : 
 source_table 
 , 
 "actions" 
 : 
 actions 
 , 
 } 
 # Call API to start risk analysis job 
 operation 
 = 
 dlp 
 . 
 create_dlp_job 
 ( 
 request 
 = 
 { 
 "parent" 
 : 
 parent 
 , 
 "risk_job" 
 : 
 risk_job 
 }) 
 def 
  
 callback 
 ( 
 message 
 : 
 google 
 . 
 cloud 
 . 
 pubsub_v1 
 . 
 subscriber 
 . 
 message 
 . 
  Message 
 
 ) 
 - 
> None 
 : 
 if 
 message 
 . 
  attributes 
 
 [ 
 "DlpJobName" 
 ] 
 == 
 operation 
 . 
 name 
 : 
 # This is the message we're looking for, so acknowledge it. 
 message 
 . 
  ack 
 
 () 
 # Now that the job is done, fetch the results and print them. 
 job 
 = 
 dlp 
 . 
 get_dlp_job 
 ( 
 request 
 = 
 { 
 "name" 
 : 
 operation 
 . 
 name 
 }) 
 print 
 ( 
 f 
 "Job name: 
 { 
 job 
 . 
 name 
 } 
 " 
 ) 
 histogram_buckets 
 = 
 ( 
 job 
 . 
 risk_details 
 . 
 l_diversity_result 
 . 
 sensitive_value_frequency_histogram_buckets 
 # noqa: E501 
 ) 
 # Print bucket stats 
 for 
 i 
 , 
 bucket 
 in 
 enumerate 
 ( 
 histogram_buckets 
 ): 
 print 
 ( 
 f 
 "Bucket 
 { 
 i 
 } 
 :" 
 ) 
 print 
 ( 
 "   Bucket size range: [ 
 {} 
 , 
 {} 
 ]" 
 . 
 format 
 ( 
 bucket 
 . 
 sensitive_value_frequency_lower_bound 
 , 
 bucket 
 . 
 sensitive_value_frequency_upper_bound 
 , 
 ) 
 ) 
 for 
 value_bucket 
 in 
 bucket 
 . 
 bucket_values 
 : 
 print 
 ( 
 "   Quasi-ID values: 
 {} 
 " 
 . 
 format 
 ( 
 map 
 ( 
 get_values 
 , 
 value_bucket 
 . 
 quasi_ids_values 
 ) 
 ) 
 ) 
 print 
 ( 
 f 
 "   Class size: 
 { 
 value_bucket 
 . 
 equivalence_class_size 
 } 
 " 
 ) 
 for 
 value 
 in 
 value_bucket 
 . 
 top_sensitive_values 
 : 
 print 
 ( 
 "   Sensitive value 
 {} 
 occurs 
 {} 
 time(s)" 
 . 
 format 
 ( 
 value 
 . 
 value 
 , 
 value 
 . 
 count 
 ) 
 ) 
 subscription 
 . 
 set_result 
 ( 
 None 
 ) 
 else 
 : 
 # This is not the message we're looking for. 
 message 
 . 
  drop 
 
 () 
 # Create a Pub/Sub client and find the subscription. The subscription is 
 # expected to already be listening to the topic. 
 subscriber 
 = 
 google 
 . 
 cloud 
 . 
 pubsub 
 . 
  SubscriberClient 
 
 () 
 subscription_path 
 = 
 subscriber 
 . 
 subscription_path 
 ( 
 project 
 , 
 subscription_id 
 ) 
 subscription 
 = 
  subscribe 
 
r . 
  subscribe 
 
 ( 
 subscription_path 
 , 
 callback 
 ) 
 try 
 : 
 subscription 
 . 
 result 
 ( 
 timeout 
 = 
 timeout 
 ) 
 except 
 concurrent 
 . 
 futures 
 . 
 TimeoutError 
 : 
 print 
 ( 
 "No event received before the timeout. Please verify that the " 
 "subscription provided is subscribed to the topic provided." 
 ) 
 subscription 
 . 
  close 
 
 () 
 

What's next

To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .

Create a Mobile Website
View Site in Mobile | Classic
Share by: