Skip to content

Commit 29db090

Browse files
authored
docs(samples): Updating readme for soft delete cost analyzer script (GoogleCloudPlatform#11222)
* docs(samples): Updating readme for soft delete cost analyzer script * Updating the way of calculations Reviewed by team: cl/608673793 * Updating the comments * reformatting the comments
1 parent ff4c1d5 commit 29db090

File tree

2 files changed

+53
-40
lines changed

2 files changed

+53
-40
lines changed

storage/cost-analysis/README.md

Lines changed: 17 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -27,26 +27,31 @@ NOTE: Due to the specific functionality related to Google Cloud APIs, this guide
2727
3. A Python environment (https://cloud.google.com/python/setup)
2828

2929
**Command-Line Arguments**
30-
* `project_name`_ (Required): Specifies your GCP project name.
31-
* `--cost_threshold`_ (Optional, default=0): Sets a relative cost threshold.
32-
* `--soft_delete_window`_ (Optional, default= 604800 (i.e. 7 days)): Time window (in seconds) for considering soft-deleted objects..
33-
* `--agg_days`_ (Optional, default=30): The period over which to combine and aggregate results.
34-
* `--lookback_days`_ (Optional, default=360): Time window (in days) for considering the how old the bucket to be.
35-
* `--list`_ (Optional): Produces a simple list of bucket names.
30+
* `project_name` - (**Required**): Specifies your GCP project name.
31+
* `--cost_threshold` - (Optional, default=0): Sets a relative cost threshold.
32+
* `--soft_delete_window` - (Optional, default= 604800 (i.e. 7 days)): Time window (in seconds) for considering soft-deleted objects..
33+
* `--agg_days` - (Optional, default=30): The period over which to combine and aggregate results.
34+
* `--lookback_days` - (Optional, default=360): Time window (in days) for considering the how old the bucket to be.
35+
* `--list` - (Optional, default=False): Produces a simple list of bucket names.
3636

37-
Note: In this sample, cost_threshold 0.15 would spotlight buckets where enabling soft delete might increase costs by over 15%.
37+
Note: In this sample, if setting cost_threshold 0.15 would spotlight buckets where enabling soft delete might increase costs by over 15%.
3838

3939
``` code-block:: bash
40-
$ python storage_soft_delete_relative_cost_analyzer.py my-project-name
40+
$ python storage_soft_delete_relative_cost_analyzer.py [your-project-name]
4141
```
4242

43-
**Important Note:** To disable soft-delete for buckets flagged by the script, follow these steps:
43+
To disable soft-delete for buckets flagged by the script, follow these steps:
4444

4545
```code-block::bash
46-
# 1. Run the analyzer to generate a list of buckets exceeding your cost threshold:
46+
# 1. Authenticate (if needed): If you're not already authenticated or prefer a specific account, run:
47+
gcloud auth application-default login
48+
49+
# 2. Run the analyzer to generate a list of buckets exceeding your cost threshold:
4750
python storage_soft_delete_relative_cost_analyzer.py [your-project-name] --[OTHER_OPTIONS] --list=True > list_of_buckets.txt
4851
49-
# 2. Update the buckets using the generated list:
52+
# 3. Update the buckets using the generated list:
5053
cat list_of_buckets.txt | gcloud storage buckets update -I --clear-soft-delete
5154
52-
```
55+
```
56+
57+
**Important Note:** <span style="color: red;">Disabling soft-delete for flagged buckets means when deleting it will permanently delete files. These files cannot be restored, even if a soft-delete policy is later re-enabled.</span>

storage/cost-analysis/storage_soft_delete_relative_cost_analyzer.py

Lines changed: 36 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -14,16 +14,16 @@
1414
# See the License for the specific language governing permissions and
1515
# limitations under the License.
1616

17-
"""
18-
Identifies buckets with relative increase in cost on enabling the soft-delete.
17+
"""Identifies buckets with relative increase in cost on enabling the soft-delete.
1918
2019
The relative increase in cost of using soft delete is calculated by combining
21-
the storage/v2/deleted_bytes metric with the existing storage/v2/total_byte_seconds
20+
the storage/v2/deleted_bytes metric with the existing
21+
storage/v2/total_byte_seconds
2222
metric.
2323
24-
Relative cost of each bucket = ('soft delete retention duration'
25-
× 'deleted bytes' / 'total bytes seconds' )
26-
x 'cost of storing in storage class'
24+
Relative cost of each bucket = ('soft delete retention duration'
25+
× 'deleted bytes' / 'total bytes seconds' )
26+
x 'cost of storing in storage class'
2727
x 'ratio of storage class'.
2828
"""
2929

@@ -49,7 +49,7 @@ def get_relative_cost(storage_class: str) -> float:
4949
"STANDARD": 0.023 / 0.023,
5050
"NEARLINE": 0.013 / 0.023,
5151
"COLDLINE": 0.007 / 0.023,
52-
"ARCHIVE": 0.0025 / 0.023
52+
"ARCHIVE": 0.0025 / 0.023,
5353
}
5454

5555
return relative_cost.get(storage_class, 1.0)
@@ -59,7 +59,7 @@ def get_soft_delete_cost(
5959
project_name: str,
6060
soft_delete_window: int,
6161
agg_days: int,
62-
lookback_days: int
62+
lookback_days: int,
6363
) -> Dict[str, List[Dict[str, float]]]:
6464
"""Calculates soft delete costs for buckets in a Google Cloud project.
6565
@@ -124,19 +124,25 @@ def calculate_soft_delete_costs(
124124
monitoring_client.QueryTimeSeriesRequest(
125125
name=f"projects/{project_name}",
126126
query=f"""
127-
{{
128-
fetch gcs_bucket :: storage.googleapis.com/storage/v2/deleted_bytes
129-
| group_by [resource.bucket_name, metric.storage_class, resource.location], window(), .sum;
130-
fetch gcs_bucket :: storage.googleapis.com/storage/v2/total_byte_seconds
131-
| group_by [resource.bucket_name, metric.storage_class, resource.location], window(), .sum
132-
}}
133-
| ratio # Calculate ratios of deleted btyes to total bytes seconds
134-
| value val(0) * {soft_delete_window}\'s\'
135-
| every {agg_days}d
136-
| within {lookback_days}d
127+
{{ # Fetch 1: Soft-deleted (bytes seconds)
128+
fetch gcs_bucket :: storage.googleapis.com/storage/v2/deleted_bytes
129+
| value val(0) * {soft_delete_window}\'s\' # Multiply by soft delete window
130+
| group_by [resource.bucket_name, metric.storage_class], window(), .sum;
131+
132+
# Fetch 2: Total byte-seconds (active objects)
133+
fetch gcs_bucket :: storage.googleapis.com/storage/v2/total_byte_seconds
134+
| filter metric.type != 'soft-deleted-object'
135+
| group_by [resource.bucket_name, metric.storage_class], window(1d), .mean # Daily average
136+
| group_by [resource.bucket_name, metric.storage_class], window(), .sum # Total over window
137+
138+
}} # End query definition
139+
| every {agg_days}d # Aggregate over larger time intervals
140+
| within {lookback_days}d # Limit data range for analysis
141+
| ratio # Calculate ratio (soft-deleted (bytes seconds)/ total (bytes seconds))
137142
""",
138143
)
139144
)
145+
140146
buckets: Dict[str, List[Dict[str, float]]] = {}
141147
missing_distribution_storage_class = []
142148
for data_point in soft_deleted_bytes_time.time_series_data:
@@ -149,7 +155,8 @@ def calculate_soft_delete_costs(
149155
soft_delete_ratio = data_point.point_data[0].values[0].double_value
150156
distribution_storage_class = bucket_name + " - " + storage_class
151157
storage_class_ratio = storage_ratios_by_bucket.get(
152-
distribution_storage_class)
158+
distribution_storage_class
159+
)
153160
if storage_class_ratio is None:
154161
missing_distribution_storage_class.append(
155162
distribution_storage_class)
@@ -159,12 +166,14 @@ def calculate_soft_delete_costs(
159166
# 'location': location,
160167
"soft_delete_ratio": soft_delete_ratio,
161168
"storage_class_ratio": storage_class_ratio,
162-
"relative_storage_class_cost": get_relative_cost(storage_class)
169+
"relative_storage_class_cost": get_relative_cost(storage_class),
163170
})
164171

165172
if missing_distribution_storage_class:
166-
print("Missing storage class for following buckets:",
167-
missing_distribution_storage_class)
173+
print(
174+
"Missing storage class for following buckets:",
175+
missing_distribution_storage_class,
176+
)
168177
raise ValueError("Cannot proceed with missing storage class ratios.")
169178

170179
return buckets
@@ -321,15 +330,14 @@ def soft_delete_relative_cost_analyzer_main() -> None:
321330
args.lookback_days,
322331
args.list,
323332
)
324-
if (not args.list):
333+
if not args.list:
325334
print(
326335
"To remove soft-delete policy from the listed buckets run:\n"
327336
# Capture output
328-
"python storage_soft_delete_relative_cost_analyzer.py [your-project-name] --[OTHER_OPTIONS] --list >"
329-
" list_of_buckets.txt\n"
330-
"cat list_of_buckets.txt | gcloud storage buckets update -I"
331-
" --clear-soft-delete\n",
332-
"\nThe buckets with approximate costs for soft delete:\n",
337+
"python storage_soft_delete_relative_cost_analyzer.py"
338+
" [your-project-name] --[OTHER_OPTIONS] --list > list_of_buckets.txt \n"
339+
"cat list_of_buckets.txt | gcloud storage buckets update -I "
340+
"--clear-soft-delete",
333341
response,
334342
)
335343
return

0 commit comments

Comments
 (0)