1414# See the License for the specific language governing permissions and
1515# limitations under the License.
1616
17- """
18- Identifies buckets with relative increase in cost on enabling the soft-delete.
17+ """Identifies buckets with relative increase in cost on enabling the soft-delete.
1918
2019The relative increase in cost of using soft delete is calculated by combining
21- the storage/v2/deleted_bytes metric with the existing storage/v2/total_byte_seconds
20+ the storage/v2/deleted_bytes metric with the existing
21+ storage/v2/total_byte_seconds
2222metric.
2323
24- Relative cost of each bucket = ('soft delete retention duration'
25- × 'deleted bytes' / 'total bytes seconds' )
26- x 'cost of storing in storage class'
24+ Relative cost of each bucket = ('soft delete retention duration'
25+ × 'deleted bytes' / 'total bytes seconds' )
26+ x 'cost of storing in storage class'
2727 x 'ratio of storage class'.
2828"""
2929
@@ -49,7 +49,7 @@ def get_relative_cost(storage_class: str) -> float:
4949 "STANDARD" : 0.023 / 0.023 ,
5050 "NEARLINE" : 0.013 / 0.023 ,
5151 "COLDLINE" : 0.007 / 0.023 ,
52- "ARCHIVE" : 0.0025 / 0.023
52+ "ARCHIVE" : 0.0025 / 0.023 ,
5353 }
5454
5555 return relative_cost .get (storage_class , 1.0 )
@@ -59,7 +59,7 @@ def get_soft_delete_cost(
5959 project_name : str ,
6060 soft_delete_window : int ,
6161 agg_days : int ,
62- lookback_days : int
62+ lookback_days : int ,
6363) -> Dict [str , List [Dict [str , float ]]]:
6464 """Calculates soft delete costs for buckets in a Google Cloud project.
6565
@@ -124,19 +124,25 @@ def calculate_soft_delete_costs(
124124 monitoring_client .QueryTimeSeriesRequest (
125125 name = f"projects/{ project_name } " ,
126126 query = f"""
127- {{
128- fetch gcs_bucket :: storage.googleapis.com/storage/v2/deleted_bytes
129- | group_by [resource.bucket_name, metric.storage_class, resource.location], window(), .sum;
130- fetch gcs_bucket :: storage.googleapis.com/storage/v2/total_byte_seconds
131- | group_by [resource.bucket_name, metric.storage_class, resource.location], window(), .sum
132- }}
133- | ratio # Calculate ratios of deleted btyes to total bytes seconds
134- | value val(0) * { soft_delete_window } \' s\'
135- | every { agg_days } d
136- | within { lookback_days } d
127+ {{ # Fetch 1: Soft-deleted (bytes seconds)
128+ fetch gcs_bucket :: storage.googleapis.com/storage/v2/deleted_bytes
129+ | value val(0) * { soft_delete_window } \' s\' # Multiply by soft delete window
130+ | group_by [resource.bucket_name, metric.storage_class], window(), .sum;
131+
132+ # Fetch 2: Total byte-seconds (active objects)
133+ fetch gcs_bucket :: storage.googleapis.com/storage/v2/total_byte_seconds
134+ | filter metric.type != 'soft-deleted-object'
135+ | group_by [resource.bucket_name, metric.storage_class], window(1d), .mean # Daily average
136+ | group_by [resource.bucket_name, metric.storage_class], window(), .sum # Total over window
137+
138+ }} # End query definition
139+ | every { agg_days } d # Aggregate over larger time intervals
140+ | within { lookback_days } d # Limit data range for analysis
141+ | ratio # Calculate ratio (soft-deleted (bytes seconds)/ total (bytes seconds))
137142 """ ,
138143 )
139144 )
145+
140146 buckets : Dict [str , List [Dict [str , float ]]] = {}
141147 missing_distribution_storage_class = []
142148 for data_point in soft_deleted_bytes_time .time_series_data :
@@ -149,7 +155,8 @@ def calculate_soft_delete_costs(
149155 soft_delete_ratio = data_point .point_data [0 ].values [0 ].double_value
150156 distribution_storage_class = bucket_name + " - " + storage_class
151157 storage_class_ratio = storage_ratios_by_bucket .get (
152- distribution_storage_class )
158+ distribution_storage_class
159+ )
153160 if storage_class_ratio is None :
154161 missing_distribution_storage_class .append (
155162 distribution_storage_class )
@@ -159,12 +166,14 @@ def calculate_soft_delete_costs(
159166 # 'location': location,
160167 "soft_delete_ratio" : soft_delete_ratio ,
161168 "storage_class_ratio" : storage_class_ratio ,
162- "relative_storage_class_cost" : get_relative_cost (storage_class )
169+ "relative_storage_class_cost" : get_relative_cost (storage_class ),
163170 })
164171
165172 if missing_distribution_storage_class :
166- print ("Missing storage class for following buckets:" ,
167- missing_distribution_storage_class )
173+ print (
174+ "Missing storage class for following buckets:" ,
175+ missing_distribution_storage_class ,
176+ )
168177 raise ValueError ("Cannot proceed with missing storage class ratios." )
169178
170179 return buckets
@@ -321,15 +330,14 @@ def soft_delete_relative_cost_analyzer_main() -> None:
321330 args .lookback_days ,
322331 args .list ,
323332 )
324- if ( not args .list ) :
333+ if not args .list :
325334 print (
326335 "To remove soft-delete policy from the listed buckets run:\n "
327336 # Capture output
328- "python storage_soft_delete_relative_cost_analyzer.py [your-project-name] --[OTHER_OPTIONS] --list >"
329- " list_of_buckets.txt\n "
330- "cat list_of_buckets.txt | gcloud storage buckets update -I"
331- " --clear-soft-delete\n " ,
332- "\n The buckets with approximate costs for soft delete:\n " ,
337+ "python storage_soft_delete_relative_cost_analyzer.py"
338+ " [your-project-name] --[OTHER_OPTIONS] --list > list_of_buckets.txt \n "
339+ "cat list_of_buckets.txt | gcloud storage buckets update -I "
340+ "--clear-soft-delete" ,
333341 response ,
334342 )
335343 return
0 commit comments