How to get the Cloud Storage soft delete storage cost.
Explore further
For detailed documentation that includes this code sample, see the following:
Code sample
Python
For more information, see the Cloud Storage Python API reference documentation .
To authenticate to Cloud Storage, set up Application Default Credentials. For more information, see Set up authentication for client libraries .
from
__future__
import
annotations
import
argparse
import
json
import
google.cloud.monitoring_v3
as
monitoring_client
def
get_relative_cost
(
storage_class
:
str
)
-
> float
:
"""Retrieves the relative cost for a given storage class and location.
Args:
storage_class: The storage class (e.g., 'standard', 'nearline').
Returns:
The price per GB from the https://cloud.google.com/storage/pricing,
divided by the standard storage class.
"""
relative_cost
=
{
"STANDARD"
:
0.023
/
0.023
,
"NEARLINE"
:
0.013
/
0.023
,
"COLDLINE"
:
0.007
/
0.023
,
"ARCHIVE"
:
0.0025
/
0.023
,
}
return
relative_cost
.
get
(
storage_class
,
1.0
)
def
get_soft_delete_cost
(
project_name
:
str
,
soft_delete_window
:
float
,
agg_days
:
int
,
lookback_days
:
int
,
)
-
> dict
[
str
,
list
[
dict
[
str
,
float
]]]:
"""Calculates soft delete costs for buckets in a Google Cloud project.
Args:
project_name: The name of the Google Cloud project.
soft_delete_window: The time window in seconds for considering
soft-deleted objects (default is 7 days).
agg_days: Aggregate results over a time period, defaults to 30-day period
lookback_days: Look back up to upto days, defaults to 360 days
Returns:
A dictionary with bucket names as keys and cost data for each bucket,
broken down by storage class.
"""
query_client
=
monitoring_client
.
QueryServiceClient
()
# Step 1: Get storage class ratios for each bucket.
storage_ratios_by_bucket
=
get_storage_class_ratio
(
project_name
,
query_client
,
agg_days
,
lookback_days
)
# Step 2: Fetch soft-deleted bytes and calculate costs using Monitoring API.
soft_deleted_costs
=
calculate_soft_delete_costs
(
project_name
,
query_client
,
soft_delete_window
,
storage_ratios_by_bucket
,
agg_days
,
lookback_days
,
)
return
soft_deleted_costs
def
calculate_soft_delete_costs
(
project_name
:
str
,
query_client
:
monitoring_client
.
QueryServiceClient
,
soft_delete_window
:
float
,
storage_ratios_by_bucket
:
dict
[
str
,
float
],
agg_days
:
int
,
lookback_days
:
int
,
)
-
> dict
[
str
,
list
[
dict
[
str
,
float
]]]:
"""Calculates the relative cost of enabling soft delete for each bucket in a
project for certain time frame in secs.
Args:
project_name: The name of the Google Cloud project.
query_client: A Monitoring API query client.
soft_delete_window: The time window in seconds for considering
soft-deleted objects (default is 7 days).
storage_ratios_by_bucket: A dictionary of storage class ratios per bucket.
agg_days: Aggregate results over a time period, defaults to 30-day period
lookback_days: Look back up to upto days, defaults to 360 days
Returns:
A dictionary with bucket names as keys and a list of cost data
dictionaries
for each bucket, broken down by storage class.
"""
soft_deleted_bytes_time
=
query_client
.
query_time_series
(
monitoring_client
.
QueryTimeSeriesRequest
(
name
=
f
"projects/
{
project_name
}
"
,
query
=
f
"""
{{ # Fetch 1: Soft-deleted (bytes seconds)
fetch gcs_bucket :: storage.googleapis.com/storage/v2/deleted_bytes
| value val(0) * {soft_delete_window}\'s\' # Multiply by soft delete window
| group_by [resource.bucket_name, metric.storage_class], window(), .sum;
# Fetch 2: Total byte-seconds (active objects)
fetch gcs_bucket :: storage.googleapis.com/storage/v2/total_byte_seconds
| filter metric.type != 'soft-deleted-object'
| group_by [resource.bucket_name, metric.storage_class], window(1d), .mean # Daily average
| group_by [resource.bucket_name, metric.storage_class], window(), .sum # Total over window
} }
# End query definition
| every
{
agg_days
}
d # Aggregate over larger time intervals
| within
{
lookback_days
}
d # Limit data range for analysis
| ratio # Calculate ratio (soft-deleted (bytes seconds)/ total (bytes seconds))
"""
,
)
)
buckets
:
dict
[
str
,
list
[
dict
[
str
,
float
]]]
=
{}
missing_distribution_storage_class
=
[]
for
data_point
in
soft_deleted_bytes_time
.
time_series_data
:
bucket_name
=
data_point
.
label_values
[
0
]
.
string_value
storage_class
=
data_point
.
label_values
[
1
]
.
string_value
# To include location-based cost analysis:
# 1. Uncomment the line below:
# location = data_point.label_values[2].string_value
# 2. Update how you calculate 'relative_storage_class_cost' to factor in location
soft_delete_ratio
=
data_point
.
point_data
[
0
]
.
values
[
0
]
.
double_value
distribution_storage_class
=
bucket_name
+
" - "
+
storage_class
storage_class_ratio
=
storage_ratios_by_bucket
.
get
(
distribution_storage_class
)
if
storage_class_ratio
is
None
:
missing_distribution_storage_class
.
append
(
distribution_storage_class
)
buckets
.
setdefault
(
bucket_name
,
[])
.
append
({
# Include storage class and location data for additional plotting dimensions.
# "storage_class": storage_class,
# 'location': location,
"soft_delete_ratio"
:
soft_delete_ratio
,
"storage_class_ratio"
:
storage_class_ratio
,
"relative_storage_class_cost"
:
get_relative_cost
(
storage_class
),
})
if
missing_distribution_storage_class
:
print
(
"Missing storage class for following buckets:"
,
missing_distribution_storage_class
,
)
raise
ValueError
(
"Cannot proceed with missing storage class ratios."
)
return
buckets
def
get_storage_class_ratio
(
project_name
:
str
,
query_client
:
monitoring_client
.
QueryServiceClient
,
agg_days
:
int
,
lookback_days
:
int
,
)
-
> dict
[
str
,
float
]:
"""Calculates storage class ratios for each bucket in a project.
This information helps determine the relative cost contribution of each
storage class to the overall soft-delete cost.
Args:
project_name: The Google Cloud project name.
query_client: Google Cloud's Monitoring Client's QueryServiceClient.
agg_days: Aggregate results over a time period, defaults to 30-day period
lookback_days: Look back up to upto days, defaults to 360 days
Returns:
Ratio of Storage classes within a bucket.
"""
request
=
monitoring_client
.
QueryTimeSeriesRequest
(
name
=
f
"projects/
{
project_name
}
"
,
query
=
f
"""
{{
# Fetch total byte-seconds for each bucket and storage class
fetch gcs_bucket :: storage.googleapis.com/storage/v2/total_byte_seconds
| group_by [resource.bucket_name, metric.storage_class], window(), .sum;
# Fetch total byte-seconds for each bucket (regardless of class)
fetch gcs_bucket :: storage.googleapis.com/storage/v2/total_byte_seconds
| group_by [resource.bucket_name], window(), .sum
} }
| ratio # Calculate ratios of storage class size to total size
| every
{
agg_days
}
d
| within
{
lookback_days
}
d
"""
,
)
storage_class_ratio
=
query_client
.
query_time_series
(
request
)
storage_ratios_by_bucket
=
{}
for
time_series
in
storage_class_ratio
.
time_series_data
:
bucket_name
=
time_series
.
label_values
[
0
]
.
string_value
storage_class
=
time_series
.
label_values
[
1
]
.
string_value
ratio
=
time_series
.
point_data
[
0
]
.
values
[
0
]
.
double_value
# Create a descriptive key for the dictionary
key
=
f
"
{
bucket_name
}
-
{
storage_class
}
"
storage_ratios_by_bucket
[
key
]
=
ratio
return
storage_ratios_by_bucket
def
soft_delete_relative_cost_analyzer
(
project_name
:
str
,
cost_threshold
:
float
=
0.0
,
soft_delete_window
:
float
=
604800
,
agg_days
:
int
=
30
,
lookback_days
:
int
=
360
,
list_buckets
:
bool
=
False
,
)
-
> str
|
dict
[
str
,
float
]:
# Note potential string output
"""Identifies buckets exceeding the relative cost threshold for enabling soft delete.
Args:
project_name: The Google Cloud project name.
cost_threshold: Threshold above which to consider removing soft delete.
soft_delete_window: Time window for calculating soft-delete costs (in
seconds).
agg_days: Aggregate results over this time period (in days).
lookback_days: Look back up to this many days.
list_buckets: Return a list of bucket names (True) or JSON (False,
default).
Returns:
JSON formatted results of buckets exceeding the threshold and costs
*or* a space-separated string of bucket names.
"""
buckets
:
dict
[
str
,
float
]
=
{}
for
bucket_name
,
storage_sources
in
get_soft_delete_cost
(
project_name
,
soft_delete_window
,
agg_days
,
lookback_days
)
.
items
():
bucket_cost
=
0.0
for
storage_source
in
storage_sources
:
bucket_cost
+=
(
storage_source
[
"soft_delete_ratio"
]
*
storage_source
[
"storage_class_ratio"
]
*
storage_source
[
"relative_storage_class_cost"
]
)
if
bucket_cost
> cost_threshold
:
buckets
[
bucket_name
]
=
round
(
bucket_cost
,
4
)
if
list_buckets
:
return
" "
.
join
(
buckets
.
keys
())
# Space-separated bucket names
else
:
return
json
.
dumps
(
buckets
,
indent
=
2
)
# JSON output
def
soft_delete_relative_cost_analyzer_main
()
-
> None
:
# Sample run: python storage_soft_delete_relative_cost_analyzer.py <Project Name>
parser
=
argparse
.
ArgumentParser
(
description
=
"Analyze and manage Google Cloud Storage soft-delete costs."
)
parser
.
add_argument
(
"project_name"
,
help
=
"The name of the Google Cloud project to analyze."
)
parser
.
add_argument
(
"--cost_threshold"
,
type
=
float
,
default
=
0.0
,
help
=
"Relative Cost threshold."
,
)
parser
.
add_argument
(
"--soft_delete_window"
,
type
=
float
,
default
=
604800.0
,
help
=
"Time window (in seconds) for considering soft-deleted objects."
,
)
parser
.
add_argument
(
"--agg_days"
,
type
=
int
,
default
=
30
,
help
=
(
"Time window (in days) for aggregating results over a time period,"
" defaults to 30-day period"
),
)
parser
.
add_argument
(
"--lookback_days"
,
type
=
int
,
default
=
360
,
help
=
(
"Time window (in days) for considering the how old the bucket to be."
),
)
parser
.
add_argument
(
"--list"
,
type
=
bool
,
default
=
False
,
help
=
"Return the list of bucketnames seperated by space."
,
)
args
=
parser
.
parse_args
()
response
=
soft_delete_relative_cost_analyzer
(
args
.
project_name
,
args
.
cost_threshold
,
args
.
soft_delete_window
,
args
.
agg_days
,
args
.
lookback_days
,
args
.
list
,
)
if
not
args
.
list
:
print
(
"To remove soft-delete policy from the listed buckets run:
\n
"
# Capture output
"python storage_soft_delete_relative_cost_analyzer.py"
" [your-project-name] --[OTHER_OPTIONS] --list > list_of_buckets.txt
\n
"
"cat list_of_buckets.txt | gcloud storage buckets update -I "
"--clear-soft-delete"
,
response
,
)
return
print
(
response
)
if
__name__
==
"__main__"
:
soft_delete_relative_cost_analyzer_main
()
What's next
To search and filter code samples for other Google Cloud products, see the Google Cloud sample browser .