From 0389d879aea9b131f9e04140bf175044e1ba1265 Mon Sep 17 00:00:00 2001 From: Diogo Simoes Date: Tue, 11 Jul 2023 09:28:30 +0100 Subject: [PATCH 01/52] fix: 00 after POW typos --- audit_log/query_counts.sql | 2 +- audit_log/top_billed_queries.sql | 2 +- audit_log/top_cost_user_by_region_and_project.sql | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/audit_log/query_counts.sql b/audit_log/query_counts.sql index 92e0711..b8796a1 100644 --- a/audit_log/query_counts.sql +++ b/audit_log/query_counts.sql @@ -72,7 +72,7 @@ WITH SELECT query, queryCount, - ROUND(SAFE_DIVIDE(totalBytesBilled, POW(1024, 3)00) * 5, 2) AS onDemandCost, + ROUND(SAFE_DIVIDE(totalBytesBilled, POW(1024, 3)) * 5, 2) AS onDemandCost, ROUND(COALESCE(totalBytesBilled, 0), 2) AS totalBytesBilled, ROUND(COALESCE(totalBytesBilled, 0) / POW(1024, 2), 2) AS totalMegabytesBilled, ROUND(COALESCE(totalBytesBilled, 0) / POW(1024, 3), 2) AS totalGigabytesBilled, diff --git a/audit_log/top_billed_queries.sql b/audit_log/top_billed_queries.sql index ffe7215..2014cc4 100644 --- a/audit_log/top_billed_queries.sql +++ b/audit_log/top_billed_queries.sql @@ -12,7 +12,7 @@ SELECT protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime, ROUND(SAFE_DIVIDE(COALESCE(protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalBilledBytes, - 0), POW(1024, 3)00) * 5, 2) AS onDemandCost, + 0), POW(1024, 3)) * 5, 2) AS onDemandCost, ROUND(COALESCE(protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalBilledBytes, 0), 2) AS totalBytesBilled, ROUND(COALESCE(protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalBilledBytes, diff --git a/audit_log/top_cost_user_by_region_and_project.sql b/audit_log/top_cost_user_by_region_and_project.sql index 58933f2..b398390 100644 --- a/audit_log/top_cost_user_by_region_and_project.sql +++ b/audit_log/top_cost_user_by_region_and_project.sql @@ -11,7 +11,7 @@ WITH src AS ( protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalBilledBytes, - ROUND(SAFE_DIVIDE(protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalBilledBytes, POW(1024, 3)00) * 5, 2) AS onDemandCost, + ROUND(SAFE_DIVIDE(protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalBilledBytes, POW(1024, 3)) * 5, 2) AS onDemandCost, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.totalSlotMs, TIMESTAMP_DIFF(protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.endTime, protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobStatistics.startTime, From c5611b88ca1c548120b8ec1c3bdc58ac6591cb95 Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Mon, 31 Jul 2023 17:25:50 -0500 Subject: [PATCH 02/52] Updated the bad byte count. --- information_schema/longest_running_queries.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/information_schema/longest_running_queries.sql b/information_schema/longest_running_queries.sql index 0932c47..d6a1813 100644 --- a/information_schema/longest_running_queries.sql +++ b/information_schema/longest_running_queries.sql @@ -58,4 +58,4 @@ FROM queriesDeduplicated ORDER BY runningTimeInSeconds DESC; -END \ No newline at end of file +END From b4365d8fb677b80874bb763462b46bbb3fe128d6 Mon Sep 17 00:00:00 2001 From: Glen Park Date: Mon, 28 Aug 2023 12:20:58 -0700 Subject: [PATCH 03/52] Initial commit for public consumption --- .../bqe_slot_utilization_by_minute.sql | 108 ++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 information_schema/bqe_slot_utilization_by_minute.sql diff --git a/information_schema/bqe_slot_utilization_by_minute.sql b/information_schema/bqe_slot_utilization_by_minute.sql new file mode 100644 index 0000000..e9c644a --- /dev/null +++ b/information_schema/bqe_slot_utilization_by_minute.sql @@ -0,0 +1,108 @@ +/* +This query pulls job data and calculates job metrics to the +minute grain (i.e. for each minute X number of jobs running and +Y total approximate slots utilized). This rowset is then joined +to reservations timeline data to calculate estimated utilization +of available slots per minute on a designated BigQuery Editions +reservation (not on demand). +*/ + +declare interval_in_days int64; +declare res_name string; +set interval_in_days = 7; +set res_name = ''; + +--original query used against sandbox projects +with base_data as +( + SELECT + job_id, + start_time, + end_time, + --Calculate the job run time in minutes (rounded up) + CEIL(TIMESTAMP_DIFF(end_time,start_time,MILLISECOND)/1000/60) AS run_duration_minutes, + total_slot_ms, + total_bytes_processed, + total_bytes_billed, + --Calculate total approximate slot count for the job + ROUND(SAFE_DIVIDE(total_slot_ms, TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)), 2) AS total_slot_count, + --Calculate slot milliseconds per minute of job run time + total_slot_ms/CEIL(TIMESTAMP_DIFF(end_time,start_time,MILLISECOND)/1000/60) AS slot_ms_per_minute, + --Calculate approximate slot count per minute of job run time + ROUND(SAFE_DIVIDE((total_slot_ms/CEIL(TIMESTAMP_DIFF(end_time,start_time,MILLISECOND)/1000/60)), TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)), 2) AS total_slot_count_per_minute, + FROM + .``.INFORMATION_SCHEMA.JOBS + WHERE + total_slot_ms IS NOT NULL + and creation_time > timestamp_sub(current_timestamp(), INTERVAL interval_in_days DAY) +), +--Isolate multiple minute jobs for the next step +multiple_min_data as +( + select * from base_data where run_duration_minutes > 1 +), +--For jobs that ran more than 1 minute, generate a duplicate row for each minute +generate_single_minute_data as +( + select + job_id, + start_time, + end_time, + slot_ms_per_minute, + total_slot_count_per_minute, + --Generate a timestamp for each minute and truncate the seconds + timestamp_trunc(minute_start, minute) as minute_timestamp + from multiple_min_data + cross join unnest(generate_timestamp_array(start_time, end_time, interval 1 minute)) as minute_start +), +--Combine all single minute rows +combined_detail_data as +( + select + job_id, + start_time, + end_time, + slot_ms_per_minute, + total_slot_count_per_minute, + minute_timestamp + from generate_single_minute_data + union all + select + job_id, + start_time, + end_time, + slot_ms_per_minute, + total_slot_count_per_minute, + --Generate a timestamp for each minute and truncate the seconds + timestamp_trunc(start_time, minute) as minute_timestamp + from base_data + where run_duration_minutes < 2 +), +agg_metrics_per_minute as +( + select + minute_timestamp as job_run_minute, + count(distinct job_id) as job_count_per_minute, + sum(total_slot_count_per_minute) as sum_slots_per_minute + from + combined_detail_data + group by 1 +) +select + a.*, + r.period_start, + r.reservation_name, + r.autoscale.current_slots, + r.autoscale.max_slots, + --Calculate approximate utilization of slots available by the jobs running during that minute + case + when r.autoscale.current_slots = 0 then 0 + else (a.sum_slots_per_minute/r.autoscale.current_slots)*100 + end as utilization_pct +from agg_metrics_per_minute a +left join +.` Date: Thu, 5 Oct 2023 17:23:42 -0700 Subject: [PATCH 04/52] Tighted join logic between jobs and reservation data --- .../bqe_slot_utilization_by_minute.sql | 36 +++++++++++++------ 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/information_schema/bqe_slot_utilization_by_minute.sql b/information_schema/bqe_slot_utilization_by_minute.sql index e9c644a..54b0db1 100644 --- a/information_schema/bqe_slot_utilization_by_minute.sql +++ b/information_schema/bqe_slot_utilization_by_minute.sql @@ -1,7 +1,7 @@ /* -This query pulls job data and calculates job metrics to the -minute grain (i.e. for each minute X number of jobs running and -Y total approximate slots utilized). This rowset is then joined +This query pulls job data for a project and calculates job metrics +to the minute grain (i.e. for each minute X number of jobs running +and Y total approximate slots utilized). This rowset is then joined to reservations timeline data to calculate estimated utilization of available slots per minute on a designated BigQuery Editions reservation (not on demand). @@ -12,8 +12,22 @@ declare res_name string; set interval_in_days = 7; set res_name = ''; ---original query used against sandbox projects -with base_data as +with reservation_data as +( + select + period_start, + reservation_name, + autoscale.current_slots, + autoscale.max_slots, + reservation_id + from + --Fill in project_id and region where reservation is set up. + .``.INFORMATION_SCHEMA.RESERVATIONS_TIMELINE_BY_PROJECT + where + period_start > timestamp_sub(current_timestamp(), INTERVAL interval_in_days DAY) + and reservation_name = res_name +), +base_jobs_data as ( SELECT job_id, @@ -31,15 +45,19 @@ with base_data as --Calculate approximate slot count per minute of job run time ROUND(SAFE_DIVIDE((total_slot_ms/CEIL(TIMESTAMP_DIFF(end_time,start_time,MILLISECOND)/1000/60)), TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)), 2) AS total_slot_count_per_minute, FROM + --Fill in project_id and region where jobs are being executed from .``.INFORMATION_SCHEMA.JOBS WHERE total_slot_ms IS NOT NULL and creation_time > timestamp_sub(current_timestamp(), INTERVAL interval_in_days DAY) + and (statement_type != "SCRIPT" OR statement_type IS NULL) + --Limit jobs data only to ones that ran in the designated reservation + and reservation_id in (select distinct reservation_id from reservation_data) ), --Isolate multiple minute jobs for the next step multiple_min_data as ( - select * from base_data where run_duration_minutes > 1 + select * from base_jobs_data where run_duration_minutes > 1 ), --For jobs that ran more than 1 minute, generate a duplicate row for each minute generate_single_minute_data as @@ -75,7 +93,7 @@ combined_detail_data as total_slot_count_per_minute, --Generate a timestamp for each minute and truncate the seconds timestamp_trunc(start_time, minute) as minute_timestamp - from base_data + from base_jobs_data where run_duration_minutes < 2 ), agg_metrics_per_minute as @@ -101,8 +119,6 @@ select end as utilization_pct from agg_metrics_per_minute a left join -.` Date: Fri, 6 Oct 2023 16:25:37 -0700 Subject: [PATCH 05/52] Replaced JOBS view with JOBS_TIMELINE_BY_ORGANIZATION --- .../bqe_slot_utilization_by_minute.sql | 129 +++++++----------- 1 file changed, 49 insertions(+), 80 deletions(-) diff --git a/information_schema/bqe_slot_utilization_by_minute.sql b/information_schema/bqe_slot_utilization_by_minute.sql index 54b0db1..a498e47 100644 --- a/information_schema/bqe_slot_utilization_by_minute.sql +++ b/information_schema/bqe_slot_utilization_by_minute.sql @@ -2,9 +2,9 @@ This query pulls job data for a project and calculates job metrics to the minute grain (i.e. for each minute X number of jobs running and Y total approximate slots utilized). This rowset is then joined -to reservations timeline data to calculate estimated utilization +to reservations timeline data to calculate estimated utilization of available slots per minute on a designated BigQuery Editions -reservation (not on demand). +reservation (not on demand) by a project. */ declare interval_in_days int64; @@ -22,103 +22,72 @@ with reservation_data as reservation_id from --Fill in project_id and region where reservation is set up. - .``.INFORMATION_SCHEMA.RESERVATIONS_TIMELINE_BY_PROJECT + .``.INFORMATION_SCHEMA.RESERVATIONS_TIMELINE where period_start > timestamp_sub(current_timestamp(), INTERVAL interval_in_days DAY) and reservation_name = res_name ), base_jobs_data as ( + --Pull all jobs data in the org sliced by the second SELECT - job_id, - start_time, - end_time, - --Calculate the job run time in minutes (rounded up) - CEIL(TIMESTAMP_DIFF(end_time,start_time,MILLISECOND)/1000/60) AS run_duration_minutes, - total_slot_ms, - total_bytes_processed, - total_bytes_billed, - --Calculate total approximate slot count for the job - ROUND(SAFE_DIVIDE(total_slot_ms, TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)), 2) AS total_slot_count, - --Calculate slot milliseconds per minute of job run time - total_slot_ms/CEIL(TIMESTAMP_DIFF(end_time,start_time,MILLISECOND)/1000/60) AS slot_ms_per_minute, - --Calculate approximate slot count per minute of job run time - ROUND(SAFE_DIVIDE((total_slot_ms/CEIL(TIMESTAMP_DIFF(end_time,start_time,MILLISECOND)/1000/60)), TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)), 2) AS total_slot_count_per_minute, + --Truncate period to the minute + timestamp_trunc(a.period_start,minute) as period_start, + a.period_slot_ms, + a.job_id, + a.job_start_time, + a.job_end_time, + a.reservation_id FROM - --Fill in project_id and region where jobs are being executed from - .``.INFORMATION_SCHEMA.JOBS + --Fill in project_id and region where jobs are being submitted to + .``.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_ORGANIZATION a + join + --Limit jobs data only to ones that ran in the designated reservation + (select distinct reservation_id from reservation_data) b + on a.reservation_id = b.reservation_id WHERE - total_slot_ms IS NOT NULL - and creation_time > timestamp_sub(current_timestamp(), INTERVAL interval_in_days DAY) + period_slot_ms IS NOT NULL + and job_creation_time > timestamp_sub(current_timestamp(), INTERVAL interval_in_days DAY) + --Avoid duplication caused by script jobs due to parent/child thread scenarios and (statement_type != "SCRIPT" OR statement_type IS NULL) - --Limit jobs data only to ones that ran in the designated reservation - and reservation_id in (select distinct reservation_id from reservation_data) -), ---Isolate multiple minute jobs for the next step -multiple_min_data as -( - select * from base_jobs_data where run_duration_minutes > 1 -), ---For jobs that ran more than 1 minute, generate a duplicate row for each minute -generate_single_minute_data as -( - select - job_id, - start_time, - end_time, - slot_ms_per_minute, - total_slot_count_per_minute, - --Generate a timestamp for each minute and truncate the seconds - timestamp_trunc(minute_start, minute) as minute_timestamp - from multiple_min_data - cross join unnest(generate_timestamp_array(start_time, end_time, interval 1 minute)) as minute_start ), ---Combine all single minute rows -combined_detail_data as +jobs_transform1 as ( - select - job_id, - start_time, - end_time, - slot_ms_per_minute, - total_slot_count_per_minute, - minute_timestamp - from generate_single_minute_data - union all - select + --Aggregate each job's periods to the minute + select + period_start, job_id, - start_time, - end_time, - slot_ms_per_minute, - total_slot_count_per_minute, - --Generate a timestamp for each minute and truncate the seconds - timestamp_trunc(start_time, minute) as minute_timestamp + reservation_id, + sum(period_slot_ms) as period_slot_ms_per_minute from base_jobs_data - where run_duration_minutes < 2 + group by 1,2,3 ), -agg_metrics_per_minute as +jobs_transform2 as ( + --Convert slot_ms to slots in each job's period select - minute_timestamp as job_run_minute, - count(distinct job_id) as job_count_per_minute, - sum(total_slot_count_per_minute) as sum_slots_per_minute + period_start, + reservation_id, + count(distinct job_id) as active_jobs_per_period, + round(safe_divide(cast(sum(period_slot_ms_per_minute) as float64),60000),2) as slots_per_period from - combined_detail_data - group by 1 + jobs_transform1 + group by 1,2 ) -select - a.*, - r.period_start, - r.reservation_name, - r.autoscale.current_slots, - r.autoscale.max_slots, - --Calculate approximate utilization of slots available by the jobs running during that minute +--Join reservation periods with job periods for final recordset. +select + r.*, + coalesce(j.active_jobs_per_period,0) as active_jobs_per_period, + coalesce(j.slots_per_period,0) as slots_per_period, case - when r.autoscale.current_slots = 0 then 0 - else (a.sum_slots_per_minute/r.autoscale.current_slots)*100 + when r.current_slots = 0 then 0 + else (j.slots_per_period/r.current_slots)*100 end as utilization_pct -from agg_metrics_per_minute a -left join +from reservation_data r -on a.job_run_minute = r.period_start -order by job_run_minute desc; \ No newline at end of file +left join +jobs_transform2 j +on r.period_start = j.period_start +and r.reservation_id = j.reservation_id +order by r.period_start desc +; From 0f9406b2b02c83531cd095dcce32cc340aaa3d35 Mon Sep 17 00:00:00 2001 From: Glen Park Date: Wed, 11 Oct 2023 15:20:38 -0700 Subject: [PATCH 06/52] Added baseline slots as a column in final recordset --- .../bqe_slot_utilization_by_minute.sql | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/information_schema/bqe_slot_utilization_by_minute.sql b/information_schema/bqe_slot_utilization_by_minute.sql index a498e47..396675c 100644 --- a/information_schema/bqe_slot_utilization_by_minute.sql +++ b/information_schema/bqe_slot_utilization_by_minute.sql @@ -5,6 +5,17 @@ and Y total approximate slots utilized). This rowset is then joined to reservations timeline data to calculate estimated utilization of available slots per minute on a designated BigQuery Editions reservation (not on demand) by a project. + +Result set schema: + - period_start: Start time of one minute period for the reservation + - reservation_name: Name of the reservation + - slots_assigned: The number of slots assigned to this reservation (baseline slots) + - current_slots: The number of slots added to the reservation by autoscaling + - max_slots: the maximum number of slots that can be added to the reservation by autoscaling + - reservation_id: ID of the reservation in the format of "project_id:location.reservation_name" + - active_jobs_per_period: Number of jobs running or ran within a period + - slots_per_period: Approximate number of slots utilized within a period + - utilization_pct: Approximate utilization percentage of current slots within a period */ declare interval_in_days int64; @@ -17,6 +28,7 @@ with reservation_data as select period_start, reservation_name, + slots_assigned, autoscale.current_slots, autoscale.max_slots, reservation_id From 0d45d0d60f5a42e65c21975f87aebfc49bd6b4cf Mon Sep 17 00:00:00 2001 From: Glen Park Date: Tue, 31 Oct 2023 12:39:09 -0700 Subject: [PATCH 07/52] Slight adjustment to logical pricing --- information_schema/billing_model_cost_estimation.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/billing_model_cost_estimation.sql b/information_schema/billing_model_cost_estimation.sql index dbe87c2..c382a5e 100644 --- a/information_schema/billing_model_cost_estimation.sql +++ b/information_schema/billing_model_cost_estimation.sql @@ -7,8 +7,8 @@ out when it comes to considering which provides the most savings. All numbers are grouped by dataset and considered estimates only. */ -DECLARE active_logical_gb_price FLOAT64 DEFAULT 0.02; -DECLARE long_term_logical_gb_price FLOAT64 DEFAULT 0.01; +DECLARE active_logical_gb_price FLOAT64 DEFAULT 0.023; +DECLARE long_term_logical_gb_price FLOAT64 DEFAULT 0.016; DECLARE active_physical_gb_price FLOAT64 DEFAULT 0.04; DECLARE long_term_physical_gb_price FLOAT64 DEFAULT 0.02; From f4566074c4f7a12f48a4293b16a0f29382ffc3a2 Mon Sep 17 00:00:00 2001 From: Glen Park Date: Fri, 8 Dec 2023 16:42:54 -0800 Subject: [PATCH 08/52] New query to identify number of queued jobs by second --- .../queued_queries_by_second.sql | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 information_schema/queued_queries_by_second.sql diff --git a/information_schema/queued_queries_by_second.sql b/information_schema/queued_queries_by_second.sql new file mode 100644 index 0000000..b11848c --- /dev/null +++ b/information_schema/queued_queries_by_second.sql @@ -0,0 +1,34 @@ +/* +This query will show the number of queued jobs broken down by seconds, regardless +of job type. + +Nested query can be used separately to identify job_ids when a hotspot timeframe has been +identified. +*/ + +-- Change this value to change how far in the past the query will search +DECLARE interval_in_days INT64 DEFAULT 1; + +with base_data as +( + select + period_start, + job_id, + state, + error_result.reason, + error_result.location, + error_result.message + from + `wpro-kraken-314320`.`region-us-west2`.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_PROJECT + where + job_creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) + AND CURRENT_TIMESTAMP() + AND state = 'PENDING' +) +select + period_start, + count(distinct job_id) as queued_jobs_cnt +from + base_data +group by 1 +order by 1; \ No newline at end of file From a5593752afb1044c85263db4e5fcd5769e84941d Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Wed, 13 Dec 2023 13:02:25 -0600 Subject: [PATCH 09/52] Updated the query to utilize some nuances of the autoscaler to give a better estimation. --- .../billing_recommendation_per_query.sql | 57 +++++++++++++------ 1 file changed, 40 insertions(+), 17 deletions(-) diff --git a/information_schema/billing_recommendation_per_query.sql b/information_schema/billing_recommendation_per_query.sql index 5d9d8f8..8d2c721 100644 --- a/information_schema/billing_recommendation_per_query.sql +++ b/information_schema/billing_recommendation_per_query.sql @@ -1,6 +1,13 @@ -- This query will look at every query ran over the specified timeframe and determine if it is better to be run -- under an on-demand or an Editions pricing billing model. In the final resultset, it will recommend on-demand --- or an Edition (or an Edition with a commit period) +-- or an Edition (or an Edition with a commit period) for each Edition type and commit period + +/* + * This query does some real-world estimations of actual costs of Editions when using the autoscaler. + * It gets to this by utilizing a few nuances of the autoscaler and its behavior, + * namely that each query is billed for a minimum of 1 minute and + * slots are billed in increments of 100 (rounds up to nearest 100). +*/ -- Change this value to change how far in the past the query will search DECLARE interval_in_days INT64 DEFAULT 14; @@ -26,7 +33,7 @@ WITH 0) / POW(1024, 4), 2) AS totalTerabytesBilled, TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS executionTimeMs FROM - ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT + `region-US`.INFORMATION_SCHEMA.JOBS_BY_PROJECT WHERE creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() @@ -34,6 +41,19 @@ WITH AND state = "DONE" ORDER BY approximateSlotCount DESC ), + rounded AS ( + SELECT + *, + -- Rounds up to the nearest 100 slots + floor((CEIL(approximateSlotCount) + 99) / 100) * 100 AS roundedUpSlots, + + -- If query ran in under 1 minute (60 seconds * 1000 ms) then round up to 1 minute + IF(executionTimeMs < 1000*60, 1000*60, executionTimeMs) AS billedDurationMs, + -- Calculates the duration in hours for calculating slot/hours used + -- Formula: (Execution Time in ms)/(1000 ms * 60 seconds * 60 minutes) + IF(executionTimeMs < 1000*60, 1000*60, executionTimeMs)/(1000*60*60) AS billedDurationHour + FROM src + ), costs AS ( SELECT *, @@ -41,15 +61,16 @@ WITH POW(1024, 4)) * 5, 2) AS legacyOnDemandCost, ROUND(SAFE_DIVIDE(totalBytesBilled, POW(1024, 4)) * 6.25, 2) AS onDemandCost, - (approximateSlotCount/(60*60)) * 0.04 AS standardEditionCost, - (approximateSlotCount/(60*60)) * 0.06 AS enterpriseEditionCost, - (approximateSlotCount/(60*60)) * 0.048 AS enterpriseEdition1YearCost, - (approximateSlotCount/(60*60)) * 0.036 AS enterpriseEdition3YearCost, - (approximateSlotCount/(60*60)) * 0.1 AS enterprisePlusEditionCost, - (approximateSlotCount/(60*60)) * 0.08 AS enterprisePlusEdition1YearCost, - (approximateSlotCount/(60*60)) * 0.06 AS enterprisePlusEdition3YearCost, + + billedDurationHour * 0.04 AS standardEditionCost, + billedDurationHour * 0.06 AS enterpriseEditionCost, + billedDurationHour * 0.048 AS enterpriseEdition1YearCost, + billedDurationHour * 0.036 AS enterpriseEdition3YearCost, + billedDurationHour * 0.1 AS enterprisePlusEditionCost, + billedDurationHour * 0.08 AS enterprisePlusEdition1YearCost, + billedDurationHour * 0.06 AS enterprisePlusEdition3YearCost FROM - src + rounded ), queryCounts AS ( SELECT @@ -65,6 +86,7 @@ WITH projectId, startTime, endTime, + totalBytesBilled, totalMegabytesBilled, totalGigabytesBilled, @@ -73,6 +95,7 @@ WITH legacyOnDemandCost, onDemandCost, + standardEditionCost, enterpriseEditionCost, enterpriseEdition1YearCost, @@ -97,16 +120,16 @@ WITH SELECT query, - IF(standardEditionComparison > 0, 'On-demand', 'Standard Edition') AS standardEditionRecommendation, - IF(enterpriseEditionComparison > 0, 'On-demand', 'Enterprise Edition') AS enterpriseEditionRecommendation, - IF(enterpriseEdition1YearComparison > 0, 'On-demand', 'Enterprise Edition 1 Year Commit') AS + IF(standardEditionComparison < 0, 'On-demand', 'Standard Edition') AS standardEditionRecommendation, + IF(enterpriseEditionComparison < 0, 'On-demand', 'Enterprise Edition') AS enterpriseEditionRecommendation, + IF(enterpriseEdition1YearComparison < 0, 'On-demand', 'Enterprise Edition 1 Year Commit') AS enterpriseEdition1YearRecommendation, - IF(enterpriseEdition3YearComparison > 0, 'On-demand', 'Enterprise Edition 3 Year Commit') AS enterpriseEdition3YearRecommendation, + IF(enterpriseEdition3YearComparison < 0, 'On-demand', 'Enterprise Edition 3 Year Commit') AS enterpriseEdition3YearRecommendation, - IF(enterpriseEditionComparison > 0, 'On-demand', 'Enterprise Plus Edition') AS enterprisePlusEditionRecommendation, - IF(enterpriseEdition1YearComparison > 0, 'On-demand', 'Enterprise Edition 1 Year Commit') AS + IF(enterpriseEditionComparison < 0, 'On-demand', 'Enterprise Plus Edition') AS enterprisePlusEditionRecommendation, + IF(enterpriseEdition1YearComparison < 0, 'On-demand', 'Enterprise Edition 1 Year Commit') AS enterpriseEditionPlus1YearRecommendation, - IF(enterprisePlusEdition3YearComparison > 0, 'On-demand', 'Enterprise Plus Edition 3 Year Commit') AS enterprisePlusEdition3YearRecommendation, + IF(enterprisePlusEdition3YearComparison < 0, 'On-demand', 'Enterprise Plus Edition 3 Year Commit') AS enterprisePlusEdition3YearRecommendation, startTime, endTime, From 2c5399ff5476eea99160e6c8088d440859f122e4 Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Wed, 13 Dec 2023 15:41:30 -0600 Subject: [PATCH 10/52] Updated the project and regions to the template value --- information_schema/billing_recommendation_per_query.sql | 6 +++++- information_schema/queued_queries_by_second.sql | 3 ++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/information_schema/billing_recommendation_per_query.sql b/information_schema/billing_recommendation_per_query.sql index 8d2c721..80a627d 100644 --- a/information_schema/billing_recommendation_per_query.sql +++ b/information_schema/billing_recommendation_per_query.sql @@ -33,7 +33,7 @@ WITH 0) / POW(1024, 4), 2) AS totalTerabytesBilled, TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS executionTimeMs FROM - `region-US`.INFORMATION_SCHEMA.JOBS_BY_PROJECT + ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT WHERE creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() @@ -86,12 +86,14 @@ WITH projectId, startTime, endTime, + billedDurationHour AS editionsBilledDurationInHours, totalBytesBilled, totalMegabytesBilled, totalGigabytesBilled, totalTerabytesBilled, approximateSlotCount, + roundedUpSlots AS billedSlotCount, legacyOnDemandCost, onDemandCost, @@ -133,7 +135,9 @@ SELECT startTime, endTime, + editionsBilledDurationInHours, approximateSlotCount, + billedSlotCount, legacyOnDemandCost, onDemandCost, diff --git a/information_schema/queued_queries_by_second.sql b/information_schema/queued_queries_by_second.sql index b11848c..b0a1d15 100644 --- a/information_schema/queued_queries_by_second.sql +++ b/information_schema/queued_queries_by_second.sql @@ -19,12 +19,13 @@ with base_data as error_result.location, error_result.message from - `wpro-kraken-314320`.`region-us-west2`.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_PROJECT + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_PROJECT where job_creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() AND state = 'PENDING' ) + select period_start, count(distinct job_id) as queued_jobs_cnt From 2f56f03da81e89e81020dedf3b85db85254d810d Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Tue, 16 Jan 2024 13:25:51 -0600 Subject: [PATCH 11/52] Updated to remove some rounding with on-demand queries that might filter out very small billed queries. This isn't an issue 99% of the time, but if a customer runs 1mil queries per month it might cause a significant impact to the cost forecasting. --- information_schema/billing_recommendation_per_query.sql | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/information_schema/billing_recommendation_per_query.sql b/information_schema/billing_recommendation_per_query.sql index 80a627d..3019b5d 100644 --- a/information_schema/billing_recommendation_per_query.sql +++ b/information_schema/billing_recommendation_per_query.sql @@ -57,10 +57,10 @@ WITH costs AS ( SELECT *, - ROUND(SAFE_DIVIDE(totalBytesBilled, - POW(1024, 4)) * 5, 2) AS legacyOnDemandCost, - ROUND(SAFE_DIVIDE(totalBytesBilled, - POW(1024, 4)) * 6.25, 2) AS onDemandCost, + SAFE_DIVIDE(totalBytesBilled, + POW(1024, 4)) * 5 AS legacyOnDemandCost, + SAFE_DIVIDE(totalBytesBilled, + POW(1024, 4)) * 6.25 AS onDemandCost, billedDurationHour * 0.04 AS standardEditionCost, billedDurationHour * 0.06 AS enterpriseEditionCost, From 465f325c1016772dae515b4d0860b5d1fe273a6b Mon Sep 17 00:00:00 2001 From: Glen Park Date: Tue, 2 Apr 2024 10:45:05 -0700 Subject: [PATCH 12/52] adjust rate from 5 to 6.25 --- information_schema/top_costly_queries.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/information_schema/top_costly_queries.sql b/information_schema/top_costly_queries.sql index a190658..939bba4 100644 --- a/information_schema/top_costly_queries.sql +++ b/information_schema/top_costly_queries.sql @@ -55,7 +55,7 @@ SELECT ROUND(COALESCE(totalBytesBilled, 0) / POW(1024, 4), 2) AS totalTerabytesBilled, ROUND(SAFE_DIVIDE(totalBytesBilled, - POW(1024, 4)) * 5, 2) AS onDemandCost + POW(1024, 4)) * 6.25, 2) AS onDemandCost FROM hashedQueries ORDER BY From aabfff61c086ac327d9ff5d63899047e1d0f66a0 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Tue, 16 Apr 2024 09:13:06 -0500 Subject: [PATCH 13/52] Update top_complex_queries.sql Updated on-demand cost from $5 to $6.25 to reflect the new pricing. --- information_schema/top_complex_queries.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/top_complex_queries.sql b/information_schema/top_complex_queries.sql index 7568d9c..e2fddad 100644 --- a/information_schema/top_complex_queries.sql +++ b/information_schema/top_complex_queries.sql @@ -16,7 +16,7 @@ BEGIN start_time AS startTime, end_time AS endTime, total_bytes_billed AS totalBytesBilled, - ROUND(SAFE_DIVIDE(total_bytes_billed, POW(1024, 4)) * 5, 2) AS onDemandCost, + ROUND(SAFE_DIVIDE(total_bytes_billed, POW(1024, 4)) * 6.25, 2) AS onDemandCost, TIMESTAMP_DIFF(end_time, start_time, SECOND) AS executionTime, TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS executionTimeMs, ROW_NUMBER() OVER (PARTITION BY job_id ORDER BY end_time DESC) AS _rnk @@ -42,4 +42,4 @@ FROM jobsDeduplicated ORDER BY approximateSlotCount DESC; -END \ No newline at end of file +END From 137002221b60da082122e488b75cad4c05133882 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Tue, 16 Apr 2024 09:14:12 -0500 Subject: [PATCH 14/52] Update top_billed_queries_deduplicated.sql Updated on-demand cost to $6.25. --- information_schema/top_billed_queries_deduplicated.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/top_billed_queries_deduplicated.sql b/information_schema/top_billed_queries_deduplicated.sql index faba08c..89c9a6e 100644 --- a/information_schema/top_billed_queries_deduplicated.sql +++ b/information_schema/top_billed_queries_deduplicated.sql @@ -24,7 +24,7 @@ BEGIN ROUND(COALESCE(total_bytes_billed, 0) / POW(1024, 4), 2) AS totalTerabytesBilled, ROUND(SAFE_DIVIDE(total_bytes_billed, - POW(1024, 4)) * 5, 2) AS cost, + POW(1024, 4)) * 6.25, 2) AS on_demand_cost, ROUND(SAFE_DIVIDE(total_slot_ms, TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)), 2) AS approximateSlotCount, ROW_NUMBER() OVER(PARTITION BY job_id ORDER BY end_time DESC) AS _rnk, @@ -59,4 +59,4 @@ FROM queriesDeduplicated ORDER BY cost DESC; -END \ No newline at end of file +END From 594bc7547c8cb6d59d8d6fe228a5f4d9af5caeb1 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Tue, 16 Apr 2024 09:16:54 -0500 Subject: [PATCH 15/52] Update top_billed_queries.sql Updated on-demand cost to $6.25. --- information_schema/top_billed_queries.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/top_billed_queries.sql b/information_schema/top_billed_queries.sql index 9df7d0b..adc5216 100644 --- a/information_schema/top_billed_queries.sql +++ b/information_schema/top_billed_queries.sql @@ -22,7 +22,7 @@ BEGIN ROUND(COALESCE(total_bytes_billed, 0) / POW(1024, 4), 2) AS totalTerabytesBilled, ROUND(SAFE_DIVIDE(total_bytes_billed, - POW(1024, 4)) * 5, 2) AS cost, + POW(1024, 4)) * 6.25, 2) AS on_demand_cost, ROUND(SAFE_DIVIDE(total_slot_ms, TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)), 2) AS approximateSlotCount, ROW_NUMBER() OVER(PARTITION BY job_id ORDER BY end_time DESC) AS _rnk @@ -49,4 +49,4 @@ FROM jobsDeduplicated ORDER BY cost DESC; -END \ No newline at end of file +END From eb75360401b6d6116b21ccbb0232fb4b5e847c8f Mon Sep 17 00:00:00 2001 From: jkdabbert <90283100+jkdabbert@users.noreply.github.com> Date: Wed, 17 Apr 2024 12:00:24 -0600 Subject: [PATCH 16/52] Update billing_recommendation_per_query.sql Adding projectid to the final output --- information_schema/billing_recommendation_per_query.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/information_schema/billing_recommendation_per_query.sql b/information_schema/billing_recommendation_per_query.sql index 3019b5d..580a55a 100644 --- a/information_schema/billing_recommendation_per_query.sql +++ b/information_schema/billing_recommendation_per_query.sql @@ -121,6 +121,7 @@ WITH costs.query = queryCounts.query ) SELECT + projectid, query, IF(standardEditionComparison < 0, 'On-demand', 'Standard Edition') AS standardEditionRecommendation, IF(enterpriseEditionComparison < 0, 'On-demand', 'Enterprise Edition') AS enterpriseEditionRecommendation, @@ -158,4 +159,4 @@ FROM ORDER BY onDemandCost DESC, approximateSlotCount DESC; -END \ No newline at end of file +END From 40d9133c457f622f6f34e0f4ec45b1ea14683b90 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Wed, 17 Apr 2024 16:12:54 -0500 Subject: [PATCH 17/52] Update general_job_information.sql Updated on-demand cost. --- information_schema/general_job_information.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/general_job_information.sql b/information_schema/general_job_information.sql index 0582d57..94927c6 100644 --- a/information_schema/general_job_information.sql +++ b/information_schema/general_job_information.sql @@ -19,7 +19,7 @@ BEGIN ROUND(COALESCE(total_bytes_billed, 0) / POW(1024, 4), 2) AS totalTerabytesBilled, total_slot_ms AS totalSlotMs, ROUND(SAFE_DIVIDE(total_bytes_billed, - POW(1024, 4)) * 5, 2) AS onDemandCost, + POW(1024, 4)) * 6.25, 2) AS onDemandCost, ROUND(SAFE_DIVIDE(total_slot_ms, TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)), 2) AS approximateSlotCount, ROW_NUMBER() OVER (PARTITION BY job_id ORDER BY end_time DESC) AS _rnk @@ -44,4 +44,4 @@ SELECT * FROM jobsDeduplicated; -END \ No newline at end of file +END From bcf4672157d9b270ad16663e76198c47424751c5 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Fri, 26 Apr 2024 15:07:52 -0500 Subject: [PATCH 18/52] Update top_billed_queries.sql Updated names to reflect the correct units. --- information_schema/top_billed_queries.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/information_schema/top_billed_queries.sql b/information_schema/top_billed_queries.sql index adc5216..651e39a 100644 --- a/information_schema/top_billed_queries.sql +++ b/information_schema/top_billed_queries.sql @@ -16,11 +16,11 @@ BEGIN ROUND(COALESCE(total_bytes_billed, 0), 2) AS totalBytesBilled, ROUND(COALESCE(total_bytes_billed, - 0) / POW(1024, 2), 2) AS totalMegabytesBilled, + 0) / POW(1024, 2), 2) AS totalMebibytesBilled, ROUND(COALESCE(total_bytes_billed, - 0) / POW(1024, 3), 2) AS totalGigabytesBilled, + 0) / POW(1024, 3), 2) AS totalGigibytesBilled, ROUND(COALESCE(total_bytes_billed, - 0) / POW(1024, 4), 2) AS totalTerabytesBilled, + 0) / POW(1024, 4), 2) AS totalTebibytesBilled, ROUND(SAFE_DIVIDE(total_bytes_billed, POW(1024, 4)) * 6.25, 2) AS on_demand_cost, ROUND(SAFE_DIVIDE(total_slot_ms, From 7e452ee64b67d89e99fd1d128fd1b3c8b4caa2da Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:53:43 -0500 Subject: [PATCH 19/52] Update top_cost_users.sql Updated to reflect new on-demand pricing. --- information_schema/top_cost_users.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/top_cost_users.sql b/information_schema/top_cost_users.sql index 561d4fc..51cbd34 100644 --- a/information_schema/top_cost_users.sql +++ b/information_schema/top_cost_users.sql @@ -23,7 +23,7 @@ BEGIN jobsDeduplicated AS ( SELECT user, - ROUND(SAFE_DIVIDE(SUM(src.totalBytesBilled), POW(1024, 4)) * 5, 2) AS onDemandCost + ROUND(SAFE_DIVIDE(SUM(src.totalBytesBilled), POW(1024, 4)) * 6.25, 2) AS onDemandCost FROM src WHERE @@ -38,4 +38,4 @@ FROM ORDER BY user DESC, onDemandCost DESC; -END \ No newline at end of file +END From 4049184dd3f8f34370a72b899fa8014202f9dd54 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:54:33 -0500 Subject: [PATCH 20/52] Update query_job_information.sql Updated on-demand cost. --- information_schema/query_job_information.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/query_job_information.sql b/information_schema/query_job_information.sql index 6ed26a4..217bd46 100644 --- a/information_schema/query_job_information.sql +++ b/information_schema/query_job_information.sql @@ -16,7 +16,7 @@ WITH referenced_tables AS referencedTables, labels, ROUND(SAFE_DIVIDE(total_bytes_billed, - POW(1024, 4)) * 5, 2) AS onDemandCost, + POW(1024, 4)) * 6.25, 2) AS onDemandCost, ROUND(COALESCE(total_bytes_billed, 0), 2) AS totalBytesBilled, ROUND(COALESCE(total_bytes_billed, @@ -51,4 +51,4 @@ SELECT * FROM jobsDeduplicated; -END \ No newline at end of file +END From 4eb02b9c40106b4d3c0b0c501ecc2cb5501010e9 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Mon, 29 Apr 2024 14:55:02 -0500 Subject: [PATCH 21/52] Update top_billed_labels.sql Updated with new on-demand cost. --- information_schema/top_billed_labels.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/top_billed_labels.sql b/information_schema/top_billed_labels.sql index 255f589..d3aa72e 100644 --- a/information_schema/top_billed_labels.sql +++ b/information_schema/top_billed_labels.sql @@ -41,7 +41,7 @@ BEGIN ROUND(SUM(COALESCE(totalBilledBytes, 0)) / POW(1024, 4), 2) AS totalTerabytesBilled, ROUND(SAFE_DIVIDE(SUM(COALESCE(totalBilledBytes, 0)), - POW(1024, 4)) * 5, 2) AS onDemandCost, + POW(1024, 4)) * 6.25, 2) AS onDemandCost, labelKey, labelValue FROM jobsDeduplicated @@ -50,4 +50,4 @@ BEGIN labelValue ORDER BY onDemandCost DESC; -END \ No newline at end of file +END From b225de73c6b7837f30e723468770eab9f12e696b Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Mon, 29 Apr 2024 15:55:48 -0500 Subject: [PATCH 22/52] Added additional SQL queries for determining physical storage pricing suggestions. One does at the dataset level and the other at the table level. --- ...er_table_storage_model_recommendations.sql | 189 ++++++++++++++++++ .../project_storage_model_recommendation.sql | 173 ++++++++++++++++ 2 files changed, 362 insertions(+) create mode 100644 information_schema/project_per_table_storage_model_recommendations.sql create mode 100644 information_schema/project_storage_model_recommendation.sql diff --git a/information_schema/project_per_table_storage_model_recommendations.sql b/information_schema/project_per_table_storage_model_recommendations.sql new file mode 100644 index 0000000..d66195d --- /dev/null +++ b/information_schema/project_per_table_storage_model_recommendations.sql @@ -0,0 +1,189 @@ +/* + * This query will look at a single project (by default in US multi-region) and + * calculate the logical and physical billing prices for each table in all datasets + * contained inside of it then provide a recommendation on if it is better to put it + * into a dataset that uses the logical (uncompressed) or physical (compressed) + * storage billing models. + * + * Physical (also called compressed) Storage went GA on July 5, 2023. It is set at + * the dataset level, so if two or more tables are better suited for different + * billing models in the same dataset it may be best to split them into separate + * datasets to get the best cost benefit. + * + * Note that there are not any performance penalties for using physical storage as + * the underlying data is always compressed and operations operate on that compressed + * data already. This is strictly a billing change. + * + * One thing to note is that there isn't a 100% effective way to determine if an + * existing table is logical or physical storage (it only works on newly created + * tables, but this is a known issue the BQ team is working on). Once this has + * been fixed this query will be updated to show the current model. + * + * It also includes inside of the storage CTE lots of extra values that can be used + * for other calculations that are being left in here to assist you as the customer + * make the best decision or to see additional information about your tables/datasets. + * + * Note it targets the US multi-region by default. If needing to change the region + * then change `region-us` below to whichever region the data exists in. Also uncomment + * the DECLARE values below for the EU region or if you are using a non-multi-region + * then refer here: https://cloud.google.com/bigquery/pricing#storage + * for the correct pricing and update accordingly. + */ + +-- These values are for the US multi-region +-- Comment these out and uncomment below if using the EU multi-region +DECLARE active_logical_price_per_gb NUMERIC DEFAULT 0.02; +DECLARE long_term_logical_price_per_gb NUMERIC DEFAULT 0.01; +DECLARE active_physical_price_per_gb NUMERIC DEFAULT 0.04; +DECLARE long_term_physical_price_per_gb NUMERIC DEFAULT 0.02; + +-- These values are for the EU multi-region +-- Uncomment these and comment out the above if using the EU multi-region +/* +DECLARE active_logical_price_per_gb NUMERIC DEFAULT 0.02; +DECLARE long_term_logical_price_per_gb NUMERIC DEFAULT 0.01; +DECLARE active_physical_price_per_gb NUMERIC DEFAULT 0.044; +DECLARE long_term_physical_price_per_gb NUMERIC DEFAULT 0.022; +*/ + +WITH storage AS +( + SELECT DISTINCT + tb.table_name, + tb.table_schema AS dataset, + total_rows, + total_partitions, + + -- Uncompressed bytes + total_logical_bytes AS total_uncompressed_bytes, + total_logical_bytes/POW(1024, 3) AS total_logical_gibytes, + total_logical_bytes/POW(1024, 4) AS total_logical_tibytes, + active_logical_bytes AS active_uncompressed_bytes, + active_logical_bytes/POW(1024, 3) AS active_uncompressed_gibytes, + active_logical_bytes/POW(1024, 4) AS active_uncompressed_tibytes, + long_term_logical_bytes AS long_term_uncompressed_bytes, + long_term_logical_bytes/POW(1024, 3) AS long_term_uncompressed_gibytes, + long_term_logical_bytes/POW(1024, 4) AS long_term_uncompressed_tibytes, + + -- Compressed bytes + total_physical_bytes AS total_compressed_bytes, + total_physical_bytes/POW(1024, 3) AS total_compressed_gibytes, + total_physical_bytes/POW(1024, 4) AS total_compressed_tibytes, + -- Note that active physical bytes includes time travel so need to remove that + active_physical_bytes-time_travel_physical_bytes AS active_compressed_bytes, + (active_physical_bytes-time_travel_physical_bytes)/POW(1024, 3) AS active_compressed_gibytes, + (active_physical_bytes-time_travel_physical_bytes)/POW(1024, 4) AS active_compressed_tibytes, + long_term_physical_bytes AS long_term_compressed_bytes, + long_term_physical_bytes/POW(1024, 3) AS long_term_compressed_gibytes, + long_term_physical_bytes/POW(1024, 4) AS long_term_compressed_tibytes, + time_travel_physical_bytes AS time_travel_compressed_bytes, + time_travel_physical_bytes/POW(1024, 3) AS time_travel_compressed_gibytes, + time_travel_physical_bytes/POW(1024, 4) AS time_travel_compressed_tibytes, + fail_safe_physical_bytes AS fail_safe_physical_bytes, + fail_safe_physical_bytes/POW(1024, 3) AS fail_safe_compressed_gibytes, + fail_safe_physical_bytes/POW(1024, 4) AS fail_safe_compressed_tibytes, + + -- Compression ratios + SAFE_DIVIDE(total_logical_bytes, total_physical_bytes) AS total_compression_ratio, -- Defined as uncompressed size/compressed size + SAFE_DIVIDE(long_term_logical_bytes, long_term_physical_bytes) AS long_term_compression_ratio, + SAFE_DIVIDE(active_logical_bytes, active_physical_bytes) AS active_compression_ratio, + + -- Pricing + ((active_logical_bytes/POW(1024, 3))*active_logical_price_per_gb) + + ((long_term_logical_bytes/POW(1024, 3))*long_term_logical_price_per_gb) AS total_uncompressed_price, + ((active_logical_bytes/POW(1024, 3))*active_logical_price_per_gb) AS active_uncompressed_price, + ((long_term_logical_bytes/POW(1024, 3))*long_term_logical_price_per_gb) AS long_term_uncompressed_price, + (((active_physical_bytes-time_travel_physical_bytes)/POW(1024, 3))*active_physical_price_per_gb) + + ((long_term_physical_bytes/POW(1024, 3))*long_term_physical_price_per_gb) AS total_compressed_price, + (((active_physical_bytes-time_travel_physical_bytes)/POW(1024, 3))*active_physical_price_per_gb) AS active_compressed_price, + (long_term_physical_bytes/POW(1024, 3))*long_term_physical_price_per_gb AS long_term_compressed_price, + (time_travel_physical_bytes/POW(1024, 3))*active_physical_price_per_gb AS time_travel_compressed_price, + (fail_safe_physical_bytes/POW(1024, 3))*active_physical_price_per_gb AS fail_safe_compressed_price + FROM + ``.``.INFORMATION_SCHEMA.TABLE_STORAGE AS tb + + -- Need to join on TABLES for existing tables to remove any temporary or job result tables + -- Note due to this information being in the TABLE_STORAGE view this means it cannot be + -- performed across an entire organization without checking the TABLES view in each project. + JOIN ``.``.INFORMATION_SCHEMA.TABLES AS t + ON t.table_catalog = tb.project_id + AND t.table_name = tb.table_name + WHERE + tb.deleted = false +), +calculations AS +( + SELECT + table_name, + dataset, + SUM(active_uncompressed_price) AS active_uncompressed_price, + SUM(active_compressed_price) AS active_compressed_price, + SUM(long_term_uncompressed_price) AS long_term_uncompressed_price, + SUM(long_term_compressed_price) AS long_term_compressed_price, + SUM(time_travel_compressed_price) AS time_travel_compressed_price, + SUM(fail_safe_compressed_price) AS fail_safe_compressed_price + FROM + storage + GROUP by + table_name, dataset +), +final_data AS +( + SELECT + dataset, + table_name, + + -- Price differences, note that >0 means physical storage is cheaper before adding in time travel and failsafe + active_uncompressed_price-active_compressed_price AS active_price_difference, + long_term_uncompressed_price-long_term_compressed_price AS long_term_price_difference, + + -- Time travel and fail safe storage reductions + (time_travel_compressed_price+fail_safe_compressed_price) AS additional_costs_for_physical_storage, + + -- Totals for each model + active_uncompressed_price+long_term_uncompressed_price AS logical_storage_price, + (active_compressed_price+long_term_compressed_price)+ + (time_travel_compressed_price+fail_safe_compressed_price) AS physical_storage_price, + + -- Difference in values (logical - active) + (active_uncompressed_price+long_term_uncompressed_price) + - + ( + (active_compressed_price+long_term_compressed_price)+ + (time_travel_compressed_price+fail_safe_compressed_price) + ) AS difference + FROM + calculations +) + +SELECT + dataset, + table_name, + + -- Logical prices and base (before adding in time travel and fail-safe reductions) physical price + CONCAT('$ ',FORMAT("%'.2f", active_uncompressed_price)) AS logical_active_price, + CONCAT('$ ',FORMAT("%'.2f", active_compressed_price)) AS base_physical_price, + CONCAT('$ ',FORMAT("%'.2f", long_term_uncompressed_price)) AS logical_long_term_price, + CONCAT('$ ',FORMAT("%'.2f", long_term_compressed_price)) AS base_long_term_price, + + -- Time travel and fail safe storage reductions + CONCAT('$ ',FORMAT("%'.2f", additional_costs_for_physical_storage)) AS additional_costs_for_physical_storage, + + -- Totals for each model + -- Note physical_storage_price is the total price with the time-travel and fail-safe reductions factored in + CONCAT('$ ',FORMAT("%'.2f", logical_storage_price)) AS logical_storage_price, + CONCAT('$ ',FORMAT("%'.2f", physical_storage_price)) AS physical_storage_price, + + -- Difference between logical storage and physical storage (logical - active) + -- Note that a negative value means logica/uncompressed is cheaper + CONCAT('$ ',FORMAT("%'.2f", difference)) AS difference_in_price_if_physical_is_chosen, + + -- Recommendation + IF(logical_storage_price < physical_storage_price, + 'Logical storage', 'Physical storage') AS recommendation, + + -- If you wish to get the raw values that are not formatted uncomment the below line + --final_data.* EXCEPT(dataset) + FROM + final_data +; \ No newline at end of file diff --git a/information_schema/project_storage_model_recommendation.sql b/information_schema/project_storage_model_recommendation.sql new file mode 100644 index 0000000..ebab405 --- /dev/null +++ b/information_schema/project_storage_model_recommendation.sql @@ -0,0 +1,173 @@ +/* + * This query will look at a single project (by default in US multi-region) and + * calculate the logical and physical billing prices for each dataset inside of it + * then provide a recommendation on whether to keep it on logical storage + * or switch to the physical billing model. + * + * Physical (also called compressed) Storage will be GA and released for public + * consumption on July 5, 2023. + * + * It also includes inside of the storage CTE lots of extra values that can be used + * for other calculations that are being left in here to assist you as the customer + * make the best decision or to see additional information about your tables/datasets. + * + * Note it targets the US multi-region by default. If needing to change the region + * then change `region-us` below to whichever region the data exists in. Also uncomment + * the DECLARE values below for the EU region or if you are using a non-multi-region + * then refer here: https://cloud.google.com/bigquery/pricing#storage + * for the correct pricing and update accordingly. + */ + +-- These values are for the US multi-region +-- Comment these out and uncomment below if using the EU multi-region +DECLARE active_logical_price_per_gb NUMERIC DEFAULT 0.02; +DECLARE long_term_logical_price_per_gb NUMERIC DEFAULT 0.01; +DECLARE active_physical_price_per_gb NUMERIC DEFAULT 0.04; +DECLARE long_term_physical_price_per_gb NUMERIC DEFAULT 0.02; + +-- These values are for the EU multi-region +-- Comment these out and uncomment above if using the US multi-region +/* +DECLARE active_logical_price_per_gb NUMERIC DEFAULT 0.02; +DECLARE long_term_logical_price_per_gb NUMERIC DEFAULT 0.01; +DECLARE active_physical_price_per_gb NUMERIC DEFAULT 0.044; +DECLARE long_term_physical_price_per_gb NUMERIC DEFAULT 0.022; +*/ + +WITH storage AS +( + SELECT DISTINCT + tb.table_name, + tb.table_schema AS dataset, + total_rows, + total_partitions, + + -- Uncompressed bytes + total_logical_bytes AS total_uncompressed_bytes, + total_logical_bytes/POW(1024, 3) AS total_logical_gibytes, + total_logical_bytes/POW(1024, 4) AS total_logical_tibytes, + active_logical_bytes AS active_uncompressed_bytes, + active_logical_bytes/POW(1024, 3) AS active_uncompressed_gibytes, + active_logical_bytes/POW(1024, 4) AS active_uncompressed_tibytes, + long_term_logical_bytes AS long_term_uncompressed_bytes, + long_term_logical_bytes/POW(1024, 3) AS long_term_uncompressed_gibytes, + long_term_logical_bytes/POW(1024, 4) AS long_term_uncompressed_tibytes, + + -- Compressed bytes + total_physical_bytes AS total_compressed_bytes, + total_physical_bytes/POW(1024, 3) AS total_compressed_gibytes, + total_physical_bytes/POW(1024, 4) AS total_compressed_tibytes, + -- Note that active physical bytes includes time travel so need to remove that + active_physical_bytes-time_travel_physical_bytes AS active_compressed_bytes, + (active_physical_bytes-time_travel_physical_bytes)/POW(1024, 3) AS active_compressed_gibytes, + (active_physical_bytes-time_travel_physical_bytes)/POW(1024, 4) AS active_compressed_tibytes, + long_term_physical_bytes AS long_term_compressed_bytes, + long_term_physical_bytes/POW(1024, 3) AS long_term_compressed_gibytes, + long_term_physical_bytes/POW(1024, 4) AS long_term_compressed_tibytes, + time_travel_physical_bytes AS time_travel_compressed_bytes, + time_travel_physical_bytes/POW(1024, 3) AS time_travel_compressed_gibytes, + time_travel_physical_bytes/POW(1024, 4) AS time_travel_compressed_tibytes, + fail_safe_physical_bytes AS fail_safe_physical_bytes, + fail_safe_physical_bytes/POW(1024, 3) AS fail_safe_compressed_gibytes, + fail_safe_physical_bytes/POW(1024, 4) AS fail_safe_compressed_tibytes, + + -- Compression ratios + SAFE_DIVIDE(total_logical_bytes, total_physical_bytes) AS total_compression_ratio, -- Defined as uncompressed size/compressed size + SAFE_DIVIDE(long_term_logical_bytes, long_term_physical_bytes) AS long_term_compression_ratio, + SAFE_DIVIDE(active_logical_bytes, active_physical_bytes) AS active_compression_ratio, + + -- Pricing + ((active_logical_bytes/POW(1024, 3))*active_logical_price_per_gb) + + ((long_term_logical_bytes/POW(1024, 3))*long_term_logical_price_per_gb) AS total_uncompressed_price, + ((active_logical_bytes/POW(1024, 3))*active_logical_price_per_gb) AS active_uncompressed_price, + ((long_term_logical_bytes/POW(1024, 3))*long_term_logical_price_per_gb) AS long_term_uncompressed_price, + (((active_physical_bytes-time_travel_physical_bytes)/POW(1024, 3))*active_physical_price_per_gb) + + ((long_term_physical_bytes/POW(1024, 3))*long_term_physical_price_per_gb) AS total_compressed_price, + (((active_physical_bytes-time_travel_physical_bytes)/POW(1024, 3))*active_physical_price_per_gb) AS active_compressed_price, + (long_term_physical_bytes/POW(1024, 3))*long_term_physical_price_per_gb AS long_term_compressed_price, + (time_travel_physical_bytes/POW(1024, 3))*active_physical_price_per_gb AS time_travel_compressed_price, + (fail_safe_physical_bytes/POW(1024, 3))*active_physical_price_per_gb AS fail_safe_compressed_price + FROM + ``.``.INFORMATION_SCHEMA.TABLE_STORAGE AS tb + + -- Need to join on TABLES for existing tables to remove any temporary or job result tables + -- Note due to this information being in the TABLE_STORAGE view this means it cannot be + -- performed across an entire organization without checking the TABLES view in each project. + JOIN ``.``.INFORMATION_SCHEMA.TABLES AS t + ON t.table_catalog = tb.project_id + AND t.table_name = tb.table_name + WHERE + tb.deleted = false +), +calculations AS +( + SELECT + dataset, + SUM(active_uncompressed_price) AS active_uncompressed_price, + SUM(active_compressed_price) AS active_compressed_price, + SUM(long_term_uncompressed_price) AS long_term_uncompressed_price, + SUM(long_term_compressed_price) AS long_term_compressed_price, + SUM(time_travel_compressed_price) AS time_travel_compressed_price, + SUM(fail_safe_compressed_price) AS fail_safe_compressed_price + FROM + storage + GROUP BY + dataset +), +final_data AS +( + SELECT + dataset, + + -- Price differences, note that >0 means physical storage is cheaper before adding in time travel and failsafe + active_uncompressed_price-active_compressed_price AS active_price_difference, + long_term_uncompressed_price-long_term_compressed_price AS long_term_price_difference, + + -- Time travel and fail safe storage reductions + (time_travel_compressed_price+fail_safe_compressed_price) AS additional_costs_for_physical_storage, + + -- Totals for each model + active_uncompressed_price+long_term_uncompressed_price AS logical_storage_price, + (active_compressed_price+long_term_compressed_price)+ + (time_travel_compressed_price+fail_safe_compressed_price) AS physical_storage_price, + + -- Difference in values (logical - active) + (active_uncompressed_price+long_term_uncompressed_price) - + ( + (active_compressed_price+long_term_compressed_price)+ + (time_travel_compressed_price+fail_safe_compressed_price) + ) AS difference + FROM + calculations +) + +SELECT + dataset, + + -- Logical prices and base (before adding in time travel and fail-safe reductions) physical price + CONCAT('$ ',FORMAT("%'.2f", active_uncompressed_price)) AS logical_active_price, + CONCAT('$ ',FORMAT("%'.2f", active_compressed_price)) AS base_physical_price, + CONCAT('$ ',FORMAT("%'.2f", long_term_uncompressed_price)) AS logical_long_term_price, + CONCAT('$ ',FORMAT("%'.2f", long_term_compressed_price)) AS base_long_term_price, + + -- Time travel and fail safe storage reductions + CONCAT('$ ',FORMAT("%'.2f", additional_costs_for_physical_storage)) AS additional_costs_for_physical_storage, + + -- Totals for each model + -- Note physical_storage_price is the total price with the time-travel and fail-safe reductions factored in + CONCAT('$ ',FORMAT("%'.2f", logical_storage_price)) AS logical_storage_price, + CONCAT('$ ',FORMAT("%'.2f", physical_storage_price)) AS physical_storage_price, + + -- Difference between logical storage and physical storage (logical - active) + -- Note that a negative value means logica/uncompressed is cheaper + CONCAT('$ ',FORMAT("%'.2f", difference)) AS difference_in_price_if_physical_is_chosen, + + -- Recommendation + IF(logical_storage_price < physical_storage_price, + 'Keep dataset on logical storage', 'Change dataset to physical storage') AS recommendation, + + -- If you wish to get the raw values that are not formatted uncomment the below line + --final_data.* EXCEPT(dataset) + FROM + final_data +; \ No newline at end of file From 154271db4b8923fe680b70ab82f9b852559950cc Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Wed, 22 May 2024 14:32:32 -0500 Subject: [PATCH 23/52] Update project_storage_model_recommendation.sql Fixed a bug where some existing fields weren't output to final query. --- .../project_storage_model_recommendation.sql | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/information_schema/project_storage_model_recommendation.sql b/information_schema/project_storage_model_recommendation.sql index ebab405..f76e24b 100644 --- a/information_schema/project_storage_model_recommendation.sql +++ b/information_schema/project_storage_model_recommendation.sql @@ -136,7 +136,12 @@ final_data AS ( (active_compressed_price+long_term_compressed_price)+ (time_travel_compressed_price+fail_safe_compressed_price) - ) AS difference + ) AS difference, + + active_uncompressed_price, + active_compressed_price, + long_term_uncompressed_price, + long_term_compressed_price FROM calculations ) @@ -170,4 +175,4 @@ SELECT --final_data.* EXCEPT(dataset) FROM final_data -; \ No newline at end of file +; From 1ed00a701667bdf2e1a154fbb6a24a67dc8f2cdc Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Mon, 17 Jun 2024 16:00:00 -0500 Subject: [PATCH 24/52] Updated the slots_by_* queries to utilize the timeline_jobs view since this has reached GA status and is more precise than just the jobs view. --- information_schema/load_slots_per_minute.sql | 73 ++++++++---------- information_schema/load_slots_per_second.sql | 74 ++++++++---------- information_schema/query_slots_per_minute.sql | 73 ++++++++---------- .../query_slots_per_minute_and_user.sql | 50 ++++++++++++ information_schema/query_slots_per_second.sql | 76 ++++++++----------- information_schema/slots_by_day.sql | 29 +++---- information_schema/slots_by_hour.sql | 29 +++---- information_schema/slots_by_minute.sql | 29 +++---- .../slots_by_minute_and_user.sql | 54 ++++++------- information_schema/slots_by_second.sql | 25 +++--- 10 files changed, 257 insertions(+), 255 deletions(-) create mode 100644 information_schema/query_slots_per_minute_and_user.sql diff --git a/information_schema/load_slots_per_minute.sql b/information_schema/load_slots_per_minute.sql index 87de6a1..291d068 100644 --- a/information_schema/load_slots_per_minute.sql +++ b/information_schema/load_slots_per_minute.sql @@ -1,59 +1,46 @@ --- This query will show the slot usage of all load jobs for each second in the specified timeframe. +-- This query will show the slot usage of all load jobs for each minute in the specified timeframe. -- Change this value to change how far in the past the query will search DECLARE interval_in_days INT64 DEFAULT 7; +DECLARE time_period INT64 DEFAULT (1000 * 60); -- Number of milliseconds in a minute + BEGIN WITH src AS ( SELECT - user_email AS user, - job_id AS jobId, - query, - DATETIME_TRUNC(start_time, - MINUTE) AS startTime, - DATETIME_TRUNC(end_time, - MINUTE) AS endTime, - SAFE_DIVIDE(total_slot_ms, - TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)) AS slotCount, - TIMESTAMP_DIFF(end_time, start_time, MINUTE) AS diff, - ROW_NUMBER() OVER (PARTITION BY job_id ORDER BY end_time DESC) AS _rnk - FROM - ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT - WHERE - creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) - AND CURRENT_TIMESTAMP() - AND total_slot_ms IS NOT NULL - AND job_type = 'LOAD'), - jobsDeduplicated AS ( - SELECT - * EXCEPT(_rnk) + SAFE_DIVIDE(SUM(period_slot_ms), time_period) AS slot_usage, -- Divide by 1 minute (1000 ms * 60 seconds) to convert to slots/minute + TIMESTAMP_TRUNC(period_start, MINUTE) as period_start FROM - src + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE - _rnk = 1 ), - differences AS ( - SELECT - *, - generate_timestamp_array(startTime, - endTime, - INTERVAL 1 MINUTE) AS int - FROM - jobsDeduplicated ), - byMinutes AS ( + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + AND job_type = 'MINUTE' + GROUP BY + period_start + ORDER BY + period_start DESC + ), + time_series AS( + SELECT + * + FROM + UNNEST(generate_timestamp_array(DATETIME_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY), MINUTE), + DATETIME_TRUNC(CURRENT_TIMESTAMP(), MINUTE), + INTERVAL 1 MINUTE)) AS timeInterval + ), + joined AS ( SELECT - * EXCEPT(int) + COALESCE(src.slot_usage, 0) as slot_usage, + timeInterval FROM - differences, - UNNEST(int) AS minute ) + src RIGHT OUTER JOIN time_series + ON period_start = timeInterval + ) SELECT - slotCount, - user, - jobId, - query, - minute + * FROM - byMinutes + joined ORDER BY - minute ASC; + timeInterval ASC; END \ No newline at end of file diff --git a/information_schema/load_slots_per_second.sql b/information_schema/load_slots_per_second.sql index 1b3b845..b5fec56 100644 --- a/information_schema/load_slots_per_second.sql +++ b/information_schema/load_slots_per_second.sql @@ -3,58 +3,44 @@ -- Change this value to change how far in the past the query will search DECLARE interval_in_days INT64 DEFAULT 7; +DECLARE time_period INT64 DEFAULT (1000); -- Number of milliseconds in a second + BEGIN -WITH - src AS ( - SELECT - user_email AS user, - job_id AS jobId, - query, - DATETIME_TRUNC(start_time, - SECOND) AS startTime, - DATETIME_TRUNC(end_time, - SECOND) AS endTime, - SAFE_DIVIDE(total_slot_ms, - TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)) AS slotCount, - TIMESTAMP_DIFF(end_time, start_time, MINUTE) AS diff, - ROW_NUMBER() OVER (PARTITION BY job_id ORDER BY end_time DESC) AS _rnk - FROM - ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT - WHERE - creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) - AND CURRENT_TIMESTAMP() - AND total_slot_ms IS NOT NULL - AND job_type = 'QUERY'), - jobsDeduplicated AS ( + WITH src AS ( SELECT - * EXCEPT(_rnk) + SAFE_DIVIDE(SUM(period_slot_ms), time_period) AS slotUsage, -- Divide by 1 second (1000 ms) to convert to slots/second + period_start FROM - src + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE - _rnk = 1 ), - differences AS ( - SELECT - *, - generate_timestamp_array(startTime, - endTime, - INTERVAL 1 SECOND) AS int - FROM - jobsDeduplicated ), - bySeconds AS ( + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + AND job_type = 'LOAD' + GROUP BY + period_start + ORDER BY + period_start DESC + ), + timeSeries AS( + SELECT + * + FROM + UNNEST(generate_timestamp_array(DATETIME_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY), SECOND), + DATETIME_TRUNC(CURRENT_TIMESTAMP(), SECOND), + INTERVAL 1 SECOND)) AS timeInterval + ), + joined AS ( SELECT - * EXCEPT(int) + COALESCE(src.slotUsage, 0) as slotUsage, + timeInterval FROM - differences, - UNNEST(int) AS second ) + src RIGHT OUTER JOIN timeSeries + ON period_start = timeInterval + ) SELECT - slotCount, - user, - jobId, - query, - second + * FROM - bySeconds + joined ORDER BY - second ASC; + timeInterval ASC; END \ No newline at end of file diff --git a/information_schema/query_slots_per_minute.sql b/information_schema/query_slots_per_minute.sql index 8421001..69322ca 100644 --- a/information_schema/query_slots_per_minute.sql +++ b/information_schema/query_slots_per_minute.sql @@ -1,59 +1,46 @@ --- This query will show the query slot usage of all queries for each second in the specified timeframe. +-- This query will show the aggregate average slot usage of all query jobs for each minute in the specified timeframe. -- Change this value to change how far in the past the query will search DECLARE interval_in_days INT64 DEFAULT 7; +DECLARE time_period INT64 DEFAULT (1000 * 60); -- Number of milliseconds in a minute + BEGIN WITH src AS ( SELECT - user_email AS user, - job_id AS jobId, - query, - DATETIME_TRUNC(start_time, - MINUTE) AS startTime, - DATETIME_TRUNC(end_time, - MINUTE) AS endTime, - SAFE_DIVIDE(total_slot_ms, - TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)) AS slotCount, - TIMESTAMP_DIFF(end_time, start_time, MINUTE) AS diff, - ROW_NUMBER() OVER (PARTITION BY job_id ORDER BY end_time DESC) AS _rnk - FROM - ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT - WHERE - creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) - AND CURRENT_TIMESTAMP() - AND total_slot_ms IS NOT NULL - AND job_type = 'QUERY'), - jobsDeduplicated AS ( - SELECT - * EXCEPT(_rnk) + SAFE_DIVIDE(SUM(period_slot_ms), time_period) AS slot_usage, -- Divide by 1 minute (1000 ms * 60 seconds) to convert to slots/minute + TIMESTAMP_TRUNC(period_start, MINUTE) as period_start FROM - src + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE - _rnk = 1 ), - differences AS ( - SELECT - *, - generate_timestamp_array(startTime, - endTime, - INTERVAL 1 MINUTE) AS int - FROM - jobsDeduplicated ), - byMinutes AS ( + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + AND job_type = 'QUERY' + GROUP BY + period_start + ORDER BY + period_start DESC + ), + time_series AS( + SELECT + * + FROM + UNNEST(generate_timestamp_array(DATETIME_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY), MINUTE), + DATETIME_TRUNC(CURRENT_TIMESTAMP(), MINUTE), + INTERVAL 1 MINUTE)) AS timeInterval + ), + joined AS ( SELECT - * EXCEPT(int) + COALESCE(src.slot_usage, 0) as slot_usage, + timeInterval FROM - differences, - UNNEST(int) AS minute ) + src RIGHT OUTER JOIN time_series + ON period_start = timeInterval + ) SELECT - slotCount, - user, - jobId, - query, - minute + * FROM - byMinutes + joined ORDER BY - minute ASC; + timeInterval ASC; END \ No newline at end of file diff --git a/information_schema/query_slots_per_minute_and_user.sql b/information_schema/query_slots_per_minute_and_user.sql new file mode 100644 index 0000000..59a4798 --- /dev/null +++ b/information_schema/query_slots_per_minute_and_user.sql @@ -0,0 +1,50 @@ +-- This query will show the aggregate average slot usage of all jobs for each minute in the specified timeframe and also group by user +-- queries over that period. + +-- Change this value to change how far in the past the query will search +DECLARE interval_in_days INT64 DEFAULT 7; + +DECLARE time_period INT64 DEFAULT (1000 * 60); -- Number of milliseconds in a minute + +BEGIN + WITH src AS ( + SELECT + SAFE_DIVIDE(SUM(period_slot_ms), time_period) AS slot_usage, -- Divide by 1 minute (1000 ms * 60 seconds) to convert to slots/minute + user_email, + TIMESTAMP_TRUNC(period_start, MINUTE) as period_start + FROM + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE + WHERE + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + AND job_type = 'QUERY' + GROUP BY + period_start, + user_email + ORDER BY + period_start DESC + ), + time_series AS( + SELECT + * + FROM + UNNEST(generate_timestamp_array(DATETIME_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY), MINUTE), + DATETIME_TRUNC(CURRENT_TIMESTAMP(), MINUTE), + INTERVAL 1 MINUTE)) AS timeInterval + ), + joined AS ( + SELECT + COALESCE(src.slot_usage, 0) as slot_usage, + user_email, + timeInterval + FROM + src RIGHT OUTER JOIN time_series + ON period_start = timeInterval + ) + +SELECT + * +FROM + joined +ORDER BY + timeInterval ASC; +END \ No newline at end of file diff --git a/information_schema/query_slots_per_second.sql b/information_schema/query_slots_per_second.sql index 2adf19b..96621ad 100644 --- a/information_schema/query_slots_per_second.sql +++ b/information_schema/query_slots_per_second.sql @@ -1,60 +1,46 @@ --- This query will show the query slot usage of all queries for each second in the specified timeframe. +-- This query will show the aggregate average slot usage of all query jobs for each second in the specified timeframe. -- Change this value to change how far in the past the query will search DECLARE interval_in_days INT64 DEFAULT 7; +DECLARE time_period INT64 DEFAULT (1000); -- Number of milliseconds in a second + BEGIN -WITH - src AS ( - SELECT - user_email AS user, - job_id AS jobId, - query, - DATETIME_TRUNC(start_time, - SECOND) AS startTime, - DATETIME_TRUNC(end_time, - SECOND) AS endTime, - SAFE_DIVIDE(total_slot_ms, - TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)) AS slotCount, - TIMESTAMP_DIFF(end_time, start_time, MINUTE) AS diff, - ROW_NUMBER() OVER (PARTITION BY job_id ORDER BY end_time DESC) AS _rnk - FROM - ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT - WHERE - creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) - AND CURRENT_TIMESTAMP() - AND total_slot_ms IS NOT NULL - AND job_type = 'QUERY'), - jobsDeduplicated AS ( + WITH src AS ( SELECT - * EXCEPT(_rnk) + SAFE_DIVIDE(SUM(period_slot_ms), time_period) AS slotUsage, -- Divide by 1 second (1000 ms) to convert to slots/second + period_start FROM - src + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE - _rnk = 1 ), - differences AS ( - SELECT - *, - generate_timestamp_array(startTime, - endTime, - INTERVAL 1 SECOND) AS int - FROM - jobsDeduplicated ), - bySeconds AS ( + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + AND job_type = 'QUERY' + GROUP BY + period_start + ORDER BY + period_start DESC + ), + timeSeries AS( + SELECT + * + FROM + UNNEST(generate_timestamp_array(DATETIME_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY), SECOND), + DATETIME_TRUNC(CURRENT_TIMESTAMP(), SECOND), + INTERVAL 1 SECOND)) AS timeInterval + ), + joined AS ( SELECT - * EXCEPT(int) + COALESCE(src.slotUsage, 0) as slotUsage, + timeInterval FROM - differences, - UNNEST(int) AS second ) + src RIGHT OUTER JOIN timeSeries + ON period_start = timeInterval + ) SELECT - slotCount, - user, - jobId, - query, - second + * FROM - bySeconds + joined ORDER BY - second ASC; + timeInterval ASC; END \ No newline at end of file diff --git a/information_schema/slots_by_day.sql b/information_schema/slots_by_day.sql index 78d719e..46d9a36 100644 --- a/information_schema/slots_by_day.sql +++ b/information_schema/slots_by_day.sql @@ -3,24 +3,24 @@ -- Change this value to change how far in the past the query will search DECLARE interval_in_days INT64 DEFAULT 7; -DECLARE time_period INT64; -SET time_period = (1000*60*60*24); -- Number of milliseconds in a day +DECLARE time_period INT64 DEFAULT (1000*60*60*24); -- Number of milliseconds in a hour BEGIN WITH src AS ( SELECT - SAFE_DIVIDE(SUM(total_slot_ms), time_period) AS slotUsage, - DATETIME_TRUNC(creation_time, - DAY) AS creationTime + SAFE_DIVIDE(SUM(period_slot_ms), time_period) AS slot_usage, -- Divide by 1 minute (1000 ms * 60 seconds) to convert to slots/minute + TIMESTAMP_TRUNC(period_start, MINUTE) as period_start FROM - ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE - creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) - AND CURRENT_TIMESTAMP() - AND total_slot_ms IS NOT NULL + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + AND job_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results GROUP BY - creationTime), - timeSeries AS( + period_start + ORDER BY + period_start DESC + ), + time_series AS( SELECT * FROM @@ -30,11 +30,12 @@ BEGIN ), joined AS ( SELECT - COALESCE(src.slotUsage, 0) as slotUsage, + COALESCE(src.slot_usage, 0) as slot_usage, timeInterval FROM - src RIGHT OUTER JOIN timeSeries - ON creationTime = timeInterval) + src RIGHT OUTER JOIN time_series + ON period_start = timeInterval + ) SELECT * diff --git a/information_schema/slots_by_hour.sql b/information_schema/slots_by_hour.sql index 25b643e..907606a 100644 --- a/information_schema/slots_by_hour.sql +++ b/information_schema/slots_by_hour.sql @@ -3,24 +3,24 @@ -- Change this value to change how far in the past the query will search DECLARE interval_in_days INT64 DEFAULT 7; -DECLARE time_period INT64; -SET time_period = (1000*60*60); -- Number of milliseconds in a hour +DECLARE time_period INT64 DEFAULT (1000*60*60); -- Number of milliseconds in a hour BEGIN WITH src AS ( SELECT - SAFE_DIVIDE(SUM(total_slot_ms), time_period) AS slotUsage, - DATETIME_TRUNC(creation_time, - HOUR) AS creationTime + SAFE_DIVIDE(SUM(period_slot_ms), time_period) AS slot_usage, -- Divide by 1 minute (1000 ms * 60 seconds) to convert to slots/minute + TIMESTAMP_TRUNC(period_start, MINUTE) as period_start FROM - ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE - creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) - AND CURRENT_TIMESTAMP() - AND total_slot_ms IS NOT NULL + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + AND job_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results GROUP BY - creationTime), - timeSeries AS( + period_start + ORDER BY + period_start DESC + ), + time_series AS( SELECT * FROM @@ -30,11 +30,12 @@ BEGIN ), joined AS ( SELECT - COALESCE(src.slotUsage, 0) as slotUsage, + COALESCE(src.slot_usage, 0) as slot_usage, timeInterval FROM - src RIGHT OUTER JOIN timeSeries - ON creationTime = timeInterval) + src RIGHT OUTER JOIN time_series + ON period_start = timeInterval + ) SELECT * diff --git a/information_schema/slots_by_minute.sql b/information_schema/slots_by_minute.sql index 7a75239..afb34e8 100644 --- a/information_schema/slots_by_minute.sql +++ b/information_schema/slots_by_minute.sql @@ -3,24 +3,24 @@ -- Change this value to change how far in the past the query will search DECLARE interval_in_days INT64 DEFAULT 7; -DECLARE time_period INT64; -SET time_period = (1000*60); -- Number of milliseconds in a minute +DECLARE time_period INT64 DEFAULT (1000 * 60); -- Number of milliseconds in a minute BEGIN WITH src AS ( SELECT - SAFE_DIVIDE(SUM(total_slot_ms), time_period) AS slotUsage, - DATETIME_TRUNC(creation_time, - MINUTE) AS creationTime + SAFE_DIVIDE(SUM(period_slot_ms), time_period) AS slot_usage, -- Divide by 1 minute (1000 ms * 60 seconds) to convert to slots/minute + TIMESTAMP_TRUNC(period_start, MINUTE) as period_start FROM - ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE - creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) - AND CURRENT_TIMESTAMP() - AND total_slot_ms IS NOT NULL + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + AND job_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results GROUP BY - creationTime), - timeSeries AS( + period_start + ORDER BY + period_start DESC + ), + time_series AS( SELECT * FROM @@ -30,11 +30,12 @@ BEGIN ), joined AS ( SELECT - COALESCE(src.slotUsage, 0) as slotUsage, + COALESCE(src.slot_usage, 0) as slot_usage, timeInterval FROM - src RIGHT OUTER JOIN timeSeries - ON creationTime = timeInterval) + src RIGHT OUTER JOIN time_series + ON period_start = timeInterval + ) SELECT * diff --git a/information_schema/slots_by_minute_and_user.sql b/information_schema/slots_by_minute_and_user.sql index 2b0ca02..7722358 100644 --- a/information_schema/slots_by_minute_and_user.sql +++ b/information_schema/slots_by_minute_and_user.sql @@ -1,46 +1,48 @@ --- This query will show the slot usage of all jobs for each minute in the interval for each user +-- This query will show the aggregate average slot usage of all jobs for each minute in the specified timeframe and also group by user +-- queries over that period. -- Change this value to change how far in the past the query will search DECLARE interval_in_days INT64 DEFAULT 7; -DECLARE time_period INT64; -SET time_period = (1000*60); -- Number of milliseconds in a minute +DECLARE time_period INT64 DEFAULT (1000 * 60); -- Number of milliseconds in a minute BEGIN WITH src AS ( SELECT - user_email AS user, - SAFE_DIVIDE(SUM(total_slot_ms), time_period) AS slotUsage, - DATETIME_TRUNC(creation_time, MINUTE) AS creationTime + SAFE_DIVIDE(SUM(period_slot_ms), time_period) AS slot_usage, -- Divide by 1 minute (1000 ms * 60 seconds) to convert to slots/minute + user_email, + TIMESTAMP_TRUNC(period_start, MINUTE) as period_start FROM - ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE - creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) - AND CURRENT_TIMESTAMP() - AND total_slot_ms IS NOT NULL + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + AND job_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results GROUP BY - user_email, creationTime), - timeSeries AS( - SELECT - * - FROM - UNNEST(generate_timestamp_array(DATETIME_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY), MINUTE), - DATETIME_TRUNC(CURRENT_TIMESTAMP(), MINUTE), - INTERVAL 1 MINUTE)) AS timeInterval + period_start, + user_email + ORDER BY + period_start DESC ), - joined AS ( + time_series AS( + SELECT + * + FROM + UNNEST(generate_timestamp_array(DATETIME_TRUNC(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY), MINUTE), + DATETIME_TRUNC(CURRENT_TIMESTAMP(), MINUTE), + INTERVAL 1 MINUTE)) AS timeInterval + ), + joined AS ( SELECT - user, - COALESCE(slotUsage, 0) as slotUsage, + COALESCE(src.slot_usage, 0) as slot_usage, + user_email, timeInterval FROM - src RIGHT OUTER JOIN timeSeries - ON creationTime = timeInterval) + src RIGHT OUTER JOIN time_series + ON period_start = timeInterval + ) SELECT - slotUsage, - user, - timeInterval + * FROM joined ORDER BY diff --git a/information_schema/slots_by_second.sql b/information_schema/slots_by_second.sql index dda80c3..3746e02 100644 --- a/information_schema/slots_by_second.sql +++ b/information_schema/slots_by_second.sql @@ -3,24 +3,24 @@ -- Change this value to change how far in the past the query will search DECLARE interval_in_days INT64 DEFAULT 7; -DECLARE time_period INT64; -SET time_period = (1000); -- Number of milliseconds in a second +DECLARE time_period INT64 DEFAULT (1000); -- Number of milliseconds in a second BEGIN WITH src AS ( SELECT - SAFE_DIVIDE(SUM(total_slot_ms), time_period) AS slotUsage, - DATETIME_TRUNC(creation_time, - SECOND) AS creationTime + SAFE_DIVIDE(SUM(period_slot_ms), time_period) AS slotUsage, -- Divide by 1 second (1000 ms) to convert to slots/second + period_start FROM - ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE - creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) - AND CURRENT_TIMESTAMP() - AND total_slot_ms IS NOT NULL + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + AND job_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results GROUP BY - creationTime), - timeSeries AS( + period_start + ORDER BY + period_start DESC + ), + timeSeries AS( SELECT * FROM @@ -34,7 +34,8 @@ BEGIN timeInterval FROM src RIGHT OUTER JOIN timeSeries - ON creationTime = timeInterval) + ON period_start = timeInterval + ) SELECT * From 275f99d5d60306a1cc8ff8e0d2afdb46ade45343 Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Mon, 1 Jul 2024 18:03:15 -0500 Subject: [PATCH 25/52] Added a query to show all projects running jobs, their types, the regions they run in, and a count of the job types. Note this query will only exist for audit logs since INFORMATION_SCHEMA is region-specific so this is very difficult to do without utilizing the audit log sink. --- audit_log/jobs_in_regions.sql | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 audit_log/jobs_in_regions.sql diff --git a/audit_log/jobs_in_regions.sql b/audit_log/jobs_in_regions.sql new file mode 100644 index 0000000..87599a0 --- /dev/null +++ b/audit_log/jobs_in_regions.sql @@ -0,0 +1,30 @@ +/* + * This query will return back the project, location, job_type, and a counter of the number of jobs running in the combination + * of them for all projects included in the BQ audit log. + * + * This will assist in tracking down jobs running outside of expected regions or show an overview of where job types are + * are running throughout your organization. + */ + +-- Change this value to change how far in the past the query will search +DECLARE interval_in_days INT64 DEFAULT 7; + +SELECT + protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobName.projectId AS project_id, + protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobName.location AS location, + UPPER(REPLACE(protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.eventName, '_job_completed', '')) AS job_type, + COUNT(3) AS job_type_counter +FROM + `..cloudaudit_googleapis_com_data_access` +WHERE + protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobName.jobId IS NOT NULL + AND protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.job.jobName.jobId NOT LIKE 'script_job_%' -- filter BQ script child jobs + AND protopayload_auditlog.servicedata_v1_bigquery.jobCompletedEvent.eventName LIKE '%_job_completed' + AND protopayload_auditlog.authenticationInfo.principalEmail IS NOT NULL + AND protopayload_auditlog.authenticationInfo.principalEmail != "" + AND (timestamp) BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) + AND CURRENT_TIMESTAMP() +GROUP BY + 1, + 2, + 3 \ No newline at end of file From 5b7073959f50d3c6aeea8ec36252b51bd0e56362 Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Mon, 8 Jul 2024 15:27:29 -0500 Subject: [PATCH 26/52] Updated to also spit out a query the user can run to convert recommended datasets to physical storage. --- .../project_storage_model_recommendation.sql | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/information_schema/project_storage_model_recommendation.sql b/information_schema/project_storage_model_recommendation.sql index f76e24b..a03f915 100644 --- a/information_schema/project_storage_model_recommendation.sql +++ b/information_schema/project_storage_model_recommendation.sql @@ -34,11 +34,14 @@ DECLARE active_physical_price_per_gb NUMERIC DEFAULT 0.044; DECLARE long_term_physical_price_per_gb NUMERIC DEFAULT 0.022; */ +-- Do not modify this line +DECLARE query_template STRING DEFAULT "ALTER SCHEMA `` SET OPTIONS(storage_billing_model = 'PHYSICAL')"; + WITH storage AS ( SELECT DISTINCT tb.table_name, - tb.table_schema AS dataset, + CONCAT(tb.PROJECT_ID, '.', tb.table_schema) AS dataset, total_rows, total_partitions, @@ -171,6 +174,10 @@ SELECT IF(logical_storage_price < physical_storage_price, 'Keep dataset on logical storage', 'Change dataset to physical storage') AS recommendation, + -- Query to run + IF(logical_storage_price > physical_storage_price, + REPLACE(query_template, '', dataset), NULL) AS physical_storage_query, + -- If you wish to get the raw values that are not formatted uncomment the below line --final_data.* EXCEPT(dataset) FROM From c013f97408280dac1d5148d5e238000ab374f5f5 Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Fri, 19 Jul 2024 13:08:45 -0500 Subject: [PATCH 27/52] Updated to pull the current storage billing model and factor this into the recommendations. Also revamped the verbiage around recommendations to take into account what the current billing model is. Note that in some cases if a customer switched to physical storage before around late August 2023 the INFORMATION_SCHEMA SCHEMATA view may not contain the update as Google did not backfill that data for customers that changed. See this bug in Google's bugtracker where I documented this: https://issuetracker.google.com/issues/296794707 --- .../project_storage_model_recommendation.sql | 120 ++++++++++++------ 1 file changed, 84 insertions(+), 36 deletions(-) diff --git a/information_schema/project_storage_model_recommendation.sql b/information_schema/project_storage_model_recommendation.sql index a03f915..4563535 100644 --- a/information_schema/project_storage_model_recommendation.sql +++ b/information_schema/project_storage_model_recommendation.sql @@ -1,23 +1,3 @@ -/* - * This query will look at a single project (by default in US multi-region) and - * calculate the logical and physical billing prices for each dataset inside of it - * then provide a recommendation on whether to keep it on logical storage - * or switch to the physical billing model. - * - * Physical (also called compressed) Storage will be GA and released for public - * consumption on July 5, 2023. - * - * It also includes inside of the storage CTE lots of extra values that can be used - * for other calculations that are being left in here to assist you as the customer - * make the best decision or to see additional information about your tables/datasets. - * - * Note it targets the US multi-region by default. If needing to change the region - * then change `region-us` below to whichever region the data exists in. Also uncomment - * the DECLARE values below for the EU region or if you are using a non-multi-region - * then refer here: https://cloud.google.com/bigquery/pricing#storage - * for the correct pricing and update accordingly. - */ - -- These values are for the US multi-region -- Comment these out and uncomment below if using the EU multi-region DECLARE active_logical_price_per_gb NUMERIC DEFAULT 0.02; @@ -34,12 +14,14 @@ DECLARE active_physical_price_per_gb NUMERIC DEFAULT 0.044; DECLARE long_term_physical_price_per_gb NUMERIC DEFAULT 0.022; */ --- Do not modify this line -DECLARE query_template STRING DEFAULT "ALTER SCHEMA `` SET OPTIONS(storage_billing_model = 'PHYSICAL')"; +-- Do not modify these two lines +DECLARE physical_storage_query_template STRING DEFAULT "ALTER SCHEMA `` SET OPTIONS(storage_billing_model = 'PHYSICAL')"; +DECLARE logical_storage_query_template STRING DEFAULT "ALTER SCHEMA `` SET OPTIONS(storage_billing_model = 'LOGICAL')"; WITH storage AS ( SELECT DISTINCT + tb.table_schema, tb.table_name, CONCAT(tb.PROJECT_ID, '.', tb.table_schema) AS dataset, total_rows, @@ -91,21 +73,51 @@ WITH storage AS (time_travel_physical_bytes/POW(1024, 3))*active_physical_price_per_gb AS time_travel_compressed_price, (fail_safe_physical_bytes/POW(1024, 3))*active_physical_price_per_gb AS fail_safe_compressed_price FROM - ``.``.INFORMATION_SCHEMA.TABLE_STORAGE AS tb + -- End user: Change to reflect your project and region + ``.``.INFORMATION_SCHEMA.TABLE_STORAGE AS tb -- Need to join on TABLES for existing tables to remove any temporary or job result tables -- Note due to this information being in the TABLE_STORAGE view this means it cannot be -- performed across an entire organization without checking the TABLES view in each project. - JOIN ``.``.INFORMATION_SCHEMA.TABLES AS t + -- End user: Change to reflect your project and region + JOIN ``.``.INFORMATION_SCHEMA.TABLES AS t ON t.table_catalog = tb.project_id AND t.table_name = tb.table_name WHERE tb.deleted = false ), +schemata_options AS +( + /* + * Extract the storage billing model + * Note that if it's not listed then it's logical, or if it was converted before ~August 26, 2023 + * then it might be physical, but Google did not backfill the schemata view showing the change. + */ + SELECT + schema_name, + option_value + FROM + -- End user: Change to reflect your project and region + ``.``.INFORMATION_SCHEMA.SCHEMATA_OPTIONS + WHERE + option_name = 'storage_billing_model' +), +storage_and_dataset_billing_type AS +( + -- Gets project name and dataset name + SELECT + S.* EXCEPT(table_schema), -- Excluding table_schema to make grouping easier below and to remove redundant data + COALESCE(SO.option_value, 'LOGICAL') AS current_storage_model + FROM + storage AS S + LEFT OUTER JOIN schemata_options AS SO + ON S.table_schema = SO.schema_name +), calculations AS ( SELECT dataset, + current_storage_model, SUM(active_uncompressed_price) AS active_uncompressed_price, SUM(active_compressed_price) AS active_compressed_price, SUM(long_term_uncompressed_price) AS long_term_uncompressed_price, @@ -113,9 +125,9 @@ calculations AS SUM(time_travel_compressed_price) AS time_travel_compressed_price, SUM(fail_safe_compressed_price) AS fail_safe_compressed_price FROM - storage + storage_and_dataset_billing_type GROUP BY - dataset + dataset, current_storage_model ), final_data AS ( @@ -141,6 +153,8 @@ final_data AS (time_travel_compressed_price+fail_safe_compressed_price) ) AS difference, + current_storage_model, + active_uncompressed_price, active_compressed_price, long_term_uncompressed_price, @@ -154,9 +168,12 @@ SELECT -- Logical prices and base (before adding in time travel and fail-safe reductions) physical price CONCAT('$ ',FORMAT("%'.2f", active_uncompressed_price)) AS logical_active_price, - CONCAT('$ ',FORMAT("%'.2f", active_compressed_price)) AS base_physical_price, + --CONCAT('$ ',FORMAT("%'.2f", active_compressed_price)) AS base_physical_price, CONCAT('$ ',FORMAT("%'.2f", long_term_uncompressed_price)) AS logical_long_term_price, - CONCAT('$ ',FORMAT("%'.2f", long_term_compressed_price)) AS base_long_term_price, + /* renamed both physical storage types to match the logical naming conventions and grouped them together */ + CONCAT('$ ',FORMAT("%'.2f", active_compressed_price)) AS physical_active_price, + --CONCAT('$ ',FORMAT("%'.2f", long_term_compressed_price)) AS base_long_term_price, + CONCAT('$ ',FORMAT("%'.2f", long_term_compressed_price)) AS physical_long_term_price, -- Time travel and fail safe storage reductions CONCAT('$ ',FORMAT("%'.2f", additional_costs_for_physical_storage)) AS additional_costs_for_physical_storage, @@ -168,18 +185,49 @@ SELECT -- Difference between logical storage and physical storage (logical - active) -- Note that a negative value means logica/uncompressed is cheaper - CONCAT('$ ',FORMAT("%'.2f", difference)) AS difference_in_price_if_physical_is_chosen, - - -- Recommendation - IF(logical_storage_price < physical_storage_price, - 'Keep dataset on logical storage', 'Change dataset to physical storage') AS recommendation, + --CONCAT('$ ',FORMAT("%'.2f", difference)) AS difference_in_price_if_physical_is_chosen, + CONCAT('$ ',FORMAT("%'.2f", difference)) AS savings_in_price_if_physical_is_chosen, + + -- Saves or Costs More Money + + -- Current storage model + current_storage_model, + + -- Recommendation for what to do + /* + * Writing this in SQL makes it look complex, but it's relatively easy. + * If currrently on logical and physical storage is cheaper than recommend changing to physical storage. Otherwise recommend to not change. + * If currrently on physical and logical storage is cheaper than recommend changing to back to logical storage. Otherwise recommend to not change. + */ + CASE current_storage_model = 'LOGICAL' + WHEN logical_storage_price < physical_storage_price THEN + 'Do nothing as logical storage is the best option (currently logical).' + WHEN logical_storage_price > physical_storage_price THEN + 'Change dataset to physical storage for additional savings.' + ELSE + -- Is on physical storage currently + IF(logical_storage_price < physical_storage_price, + 'Dataset is currently using physical storage and costing you more money than logical storage. Change dataset back to logical storage.', + 'Do nothing as physical storage is the best option (currently physical).') + END AS recommendation, -- Query to run - IF(logical_storage_price > physical_storage_price, - REPLACE(query_template, '', dataset), NULL) AS physical_storage_query, + /* + * This looks complex again due to SQL, but uses same logic as above statement but emits SQL to make the change. + */ + CASE current_storage_model = 'LOGICAL' + WHEN logical_storage_price > physical_storage_price THEN + REPLACE(physical_storage_query_template, '', dataset) + WHEN logical_storage_price < physical_storage_price THEN + NULL + ELSE + -- If on physical storage currently and needs to change emit the correct query + IF(logical_storage_price < physical_storage_price, + REPLACE(logical_storage_query_template, '', dataset), NULL) + END AS recommendation_change_SQL -- If you wish to get the raw values that are not formatted uncomment the below line --final_data.* EXCEPT(dataset) FROM final_data -; +; \ No newline at end of file From 12ed616840e2a52be42542db915be6c59475997e Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Fri, 19 Jul 2024 13:14:34 -0500 Subject: [PATCH 28/52] Updated with a note about why the billing storage model may be incorrect for early adopting customers. --- .../project_storage_model_recommendation.sql | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/information_schema/project_storage_model_recommendation.sql b/information_schema/project_storage_model_recommendation.sql index 4563535..044cce6 100644 --- a/information_schema/project_storage_model_recommendation.sql +++ b/information_schema/project_storage_model_recommendation.sql @@ -191,6 +191,13 @@ SELECT -- Saves or Costs More Money -- Current storage model + /* + * Note this may be incorrect if you changed your billing model to physical prior to August 21, 2023. + * The reason is that Google didn't backfill the billing model to customers that were early adopters of the + * public version of physical storage. + * + * Bugtracker to see this: https://issuetracker.google.com/issues/296794707 + */ current_storage_model, -- Recommendation for what to do From bf146c6b30976996b78fda805bac8fe82bb80ded Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Wed, 7 Aug 2024 14:59:11 -0500 Subject: [PATCH 29/52] Updated logic and cleaned up SQL to be less confusing. New logic will now cover the scenario a customer has physical storage currently and should stay on it as it's cheaper then showing a NULL value for the SQL code displayed with that recommendation. --- .../project_storage_model_recommendation.sql | 55 +++++++++++-------- 1 file changed, 33 insertions(+), 22 deletions(-) diff --git a/information_schema/project_storage_model_recommendation.sql b/information_schema/project_storage_model_recommendation.sql index 044cce6..c3486c0 100644 --- a/information_schema/project_storage_model_recommendation.sql +++ b/information_schema/project_storage_model_recommendation.sql @@ -74,13 +74,13 @@ WITH storage AS (fail_safe_physical_bytes/POW(1024, 3))*active_physical_price_per_gb AS fail_safe_compressed_price FROM -- End user: Change to reflect your project and region - ``.``.INFORMATION_SCHEMA.TABLE_STORAGE AS tb + ``.``.INFORMATION_SCHEMA.TABLE_STORAGE AS tb -- Need to join on TABLES for existing tables to remove any temporary or job result tables -- Note due to this information being in the TABLE_STORAGE view this means it cannot be -- performed across an entire organization without checking the TABLES view in each project. -- End user: Change to reflect your project and region - JOIN ``.``.INFORMATION_SCHEMA.TABLES AS t + JOIN ``.``.INFORMATION_SCHEMA.TABLES AS t ON t.table_catalog = tb.project_id AND t.table_name = tb.table_name WHERE @@ -98,7 +98,7 @@ schemata_options AS option_value FROM -- End user: Change to reflect your project and region - ``.``.INFORMATION_SCHEMA.SCHEMATA_OPTIONS + ``.``.INFORMATION_SCHEMA.SCHEMATA_OPTIONS WHERE option_name = 'storage_billing_model' ), @@ -202,36 +202,47 @@ SELECT -- Recommendation for what to do /* - * Writing this in SQL makes it look complex, but it's relatively easy. + * Writing this in SQL makes it more complex than it is, but it's relatively easy. * If currrently on logical and physical storage is cheaper than recommend changing to physical storage. Otherwise recommend to not change. * If currrently on physical and logical storage is cheaper than recommend changing to back to logical storage. Otherwise recommend to not change. */ - CASE current_storage_model = 'LOGICAL' - WHEN logical_storage_price < physical_storage_price THEN - 'Do nothing as logical storage is the best option (currently logical).' - WHEN logical_storage_price > physical_storage_price THEN + IF( + current_storage_model = 'LOGICAL', + -- Is on logical storage currently + IF( + logical_storage_price < physical_storage_price, + 'Do nothing as logical storage is the best option (currently logical).', 'Change dataset to physical storage for additional savings.' - ELSE + ), -- Is on physical storage currently - IF(logical_storage_price < physical_storage_price, - 'Dataset is currently using physical storage and costing you more money than logical storage. Change dataset back to logical storage.', - 'Do nothing as physical storage is the best option (currently physical).') - END AS recommendation, + IF(logical_storage_price < physical_storage_price, + 'Dataset is currently using physical storage and costing you more money than logical storage. Change dataset back to logical storage.', + 'Do nothing as physical storage is the best option (currently physical).') + ) AS recommendation, -- Query to run /* * This looks complex again due to SQL, but uses same logic as above statement but emits SQL to make the change. */ - CASE current_storage_model = 'LOGICAL' - WHEN logical_storage_price > physical_storage_price THEN + IF( + current_storage_model = 'LOGICAL', + -- Is on logical storage currently + IF( + logical_storage_price < physical_storage_price, + -- Do nothing + NULL, + -- Use the change to physical storage query REPLACE(physical_storage_query_template, '', dataset) - WHEN logical_storage_price < physical_storage_price THEN - NULL - ELSE - -- If on physical storage currently and needs to change emit the correct query - IF(logical_storage_price < physical_storage_price, - REPLACE(logical_storage_query_template, '', dataset), NULL) - END AS recommendation_change_SQL + ), + -- Is on physical storage currently + IF( + logical_storage_price < physical_storage_price, + -- Use the change to logical storage query + REPLACE(logical_storage_query_template, '', dataset), + -- Do nothing + NULL + ) + ) AS recommendation_change_SQL -- If you wish to get the raw values that are not formatted uncomment the below line --final_data.* EXCEPT(dataset) From a7ccf0313ddae5c9f21f3b880b00ca18683b7946 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Wed, 11 Sep 2024 10:54:42 -0500 Subject: [PATCH 30/52] Update load_slots_per_minute.sql Fixed a bug on the job_type value. --- information_schema/load_slots_per_minute.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/load_slots_per_minute.sql b/information_schema/load_slots_per_minute.sql index 291d068..74636af 100644 --- a/information_schema/load_slots_per_minute.sql +++ b/information_schema/load_slots_per_minute.sql @@ -14,7 +14,7 @@ BEGIN ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() - AND job_type = 'MINUTE' + AND job_type = 'LOAD' GROUP BY period_start ORDER BY @@ -43,4 +43,4 @@ FROM joined ORDER BY timeInterval ASC; -END \ No newline at end of file +END From 3091ecbfb1eaea800c4a552dff13d9ae095add85 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Wed, 30 Oct 2024 09:53:29 -0500 Subject: [PATCH 31/52] Update billing_model_cost_estimation.sql Added a note that this file is deprecated and put a link to the new file to not break any old links to this script. --- .../billing_model_cost_estimation.sql | 66 ++----------------- 1 file changed, 4 insertions(+), 62 deletions(-) diff --git a/information_schema/billing_model_cost_estimation.sql b/information_schema/billing_model_cost_estimation.sql index c382a5e..057259d 100644 --- a/information_schema/billing_model_cost_estimation.sql +++ b/information_schema/billing_model_cost_estimation.sql @@ -1,63 +1,5 @@ /* -Query that calculates both logical and physical storage billing costs -along with compression ratio (>2.0 means cost savings in physical -billing model). Does not take into account 10gb free storage per month -given that is weighted equally between logical/physical so it cancels -out when it comes to considering which provides the most savings. - -All numbers are grouped by dataset and considered estimates only. -*/ -DECLARE active_logical_gb_price FLOAT64 DEFAULT 0.023; -DECLARE long_term_logical_gb_price FLOAT64 DEFAULT 0.016; -DECLARE active_physical_gb_price FLOAT64 DEFAULT 0.04; -DECLARE long_term_physical_gb_price FLOAT64 DEFAULT 0.02; - -with storage_sizes as -( - select - a.project_id - ,a.table_schema - ,SUM(a.active_logical_bytes) / power(1000, 3) AS active_logical_gb - ,SUM(a.long_term_logical_bytes) / power(1000, 3) AS long_term_logical_gb - ,SUM(a.active_physical_bytes) / power(1000, 3) AS active_physical_gb - ,SUM(a.long_term_physical_bytes) / power(1000, 3) AS long_term_physical_gb - ,SUM(a.total_physical_bytes) / power(1000, 3) AS total_physical_gb - ,SUM(a.total_logical_bytes) / power(1000, 3) AS total_logical_gb - from - ``.``.INFORMATION_SCHEMA.TABLE_STORAGE_BY_PROJECT a - join ``.``.INFORMATION_SCHEMA.TABLES b - on a.project_id = b.table_catalog - and a.table_schema = b.table_schema - and a.table_name = b.table_name - WHERE a.total_logical_bytes > 0 - AND a.total_physical_bytes > 0 - GROUP BY a.project_id, a.table_schema -) -,cost_calcs as -( - select - project_id - ,table_schema - ,active_logical_gb - ,active_logical_gb*active_logical_gb_price as active_logical_cost - ,long_term_logical_gb - ,long_term_logical_gb*long_term_logical_gb_price as long_term_logical_cost - ,active_physical_gb - ,active_physical_gb*active_physical_gb_price as active_physical_cost - ,long_term_physical_gb - ,long_term_physical_gb*long_term_physical_gb_price as long_term_physical_cost - ,total_logical_gb / total_physical_gb AS compression_ratio - from storage_sizes -) -select - project_id - ,table_schema - ,active_logical_gb - ,long_term_logical_gb - ,(active_logical_cost+long_term_logical_cost) as total_logical_cost - ,active_physical_gb - ,long_term_physical_gb - ,(active_physical_cost+long_term_physical_cost) as total_physical_cost - ,compression_ratio -from -cost_calcs; + * This file has been deprecated, but has links to it all over the internet. + * Please use this SQL file instead: + * https://github.com/doitintl/bigquery-optimization-queries/blob/main/information_schema/project_storage_model_recommendation.sql + */ From 0a1557c040ff7bb9f0723b6ea881065eedcfaeea Mon Sep 17 00:00:00 2001 From: Mike Royle Date: Mon, 9 Jun 2025 14:17:50 -0700 Subject: [PATCH 32/52] took the billing_recommendation_per_query query and modified it to look across all queries for the last 30 days and make a recommendation --- .../total_billing_recommendation.sql | 123 ++++++++++++++++++ 1 file changed, 123 insertions(+) create mode 100644 information_schema/total_billing_recommendation.sql diff --git a/information_schema/total_billing_recommendation.sql b/information_schema/total_billing_recommendation.sql new file mode 100644 index 0000000..03dab44 --- /dev/null +++ b/information_schema/total_billing_recommendation.sql @@ -0,0 +1,123 @@ +-- This query aggregates costs across all queries in the specified timeframe and compares +-- on-demand pricing versus different BigQuery Editions pricing models + +-- Change this value to change how far in the past the query will search +DECLARE interval_in_days INT64 DEFAULT 30; + +BEGIN +WITH + src AS ( + SELECT + ROUND(SAFE_DIVIDE(total_slot_ms, + TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)), 2) AS approximateSlotCount, + job_type, + query, + project_id AS projectId, + start_time AS startTime, + end_time AS endTime, + ROUND(COALESCE(total_bytes_billed, + 0), 2) AS totalBytesBilled, + ROUND(COALESCE(total_bytes_billed, + 0) / POW(1024, 2), 2) AS totalMegabytesBilled, + ROUND(COALESCE(total_bytes_billed, + 0) / POW(1024, 3), 2) AS totalGigabytesBilled, + ROUND(COALESCE(total_bytes_billed, + 0) / POW(1024, 4), 2) AS totalTerabytesBilled, + TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS executionTimeMs + FROM + ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT + WHERE + creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) + AND CURRENT_TIMESTAMP() + ), + rounded AS ( + SELECT + *, + -- Rounds up to the nearest 50 slots (autoscaler increments) + floor((CEIL(approximateSlotCount) + 49) / 50) * 50 AS roundedUpSlots, + + -- If query ran in under 1 minute (60 seconds * 1000 ms) then round up to 1 minute + IF(executionTimeMs < 1000*60, 1000*60, executionTimeMs) AS billedDurationMs, + -- Calculates the duration in hours for calculating slot/hours used + -- Formula: (Execution Time in ms)/(1000 ms * 60 seconds * 60 minutes) + IF(executionTimeMs < 1000*60, 1000*60, executionTimeMs)/(1000*60*60) AS billedDurationHour, + + -- Apply minimum 10 MiB billing per query + GREATEST(totalBytesBilled, 10 * POW(1024, 2)) AS billedBytes, + GREATEST(totalMegabytesBilled, 10) AS billedMegabytes, + GREATEST(totalGigabytesBilled, 10/1024) AS billedGigabytes, + GREATEST(totalTerabytesBilled, 10/POW(1024, 2)) AS billedTerabytes + FROM src + ), + costs AS ( + SELECT + *, + SAFE_DIVIDE(billedBytes, + POW(1024, 4)) * 6.25 AS onDemandCost, + billedDurationHour * 0.04 AS standardEditionCost, + billedDurationHour * 0.06 AS enterpriseEditionCost, + billedDurationHour * 0.048 AS enterpriseEdition1YearCost, + billedDurationHour * 0.036 AS enterpriseEdition3YearCost, + billedDurationHour * 0.1 AS enterprisePlusEditionCost, + billedDurationHour * 0.08 AS enterprisePlusEdition1YearCost, + billedDurationHour * 0.06 AS enterprisePlusEdition3YearCost + FROM + rounded + ) + +-- Aggregate costs across all queries +SELECT + -- Time period information + CONCAT('Last ', interval_in_days, ' days (', + FORMAT_TIMESTAMP('%Y-%m-%d', TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY)), + ' to ', + FORMAT_TIMESTAMP('%Y-%m-%d', CURRENT_TIMESTAMP()), + ')') AS time_period, + + -- Query count + COUNT(DISTINCT query) AS total_unique_queries, + COUNT(*) AS total_query_executions, + + -- Total bytes processed + SUM(billedBytes) AS total_bytes_billed, + ROUND(SUM(billedBytes) / POW(1024, 4), 2) AS total_terabytes_billed, + + -- Total slot hours + SUM(billedDurationHour) AS total_billed_slot_hours, + + -- Aggregated costs + ROUND(SUM(onDemandCost), 2) AS total_on_demand_cost, + ROUND(SUM(standardEditionCost), 2) AS total_standard_edition_cost, + ROUND(SUM(enterpriseEditionCost), 2) AS total_enterprise_edition_cost, + ROUND(SUM(enterpriseEdition1YearCost), 2) AS total_enterprise_1year_cost, + ROUND(SUM(enterpriseEdition3YearCost), 2) AS total_enterprise_3year_cost, + ROUND(SUM(enterprisePlusEditionCost), 2) AS total_enterprise_plus_cost, + ROUND(SUM(enterprisePlusEdition1YearCost), 2) AS total_enterprise_plus_1year_cost, + ROUND(SUM(enterprisePlusEdition3YearCost), 2) AS total_enterprise_plus_3year_cost, + + -- Cost comparisons (positive means on-demand is more expensive) + ROUND(SUM(onDemandCost) - SUM(standardEditionCost), 2) AS on_demand_vs_standard_diff, + ROUND(SUM(onDemandCost) - SUM(enterpriseEditionCost), 2) AS on_demand_vs_enterprise_diff, + ROUND(SUM(onDemandCost) - SUM(enterpriseEdition1YearCost), 2) AS on_demand_vs_enterprise_1year_diff, + ROUND(SUM(onDemandCost) - SUM(enterpriseEdition3YearCost), 2) AS on_demand_vs_enterprise_3year_diff, + + -- Cost savings percentages + ROUND(100 * (SUM(onDemandCost) - SUM(standardEditionCost)) / NULLIF(SUM(onDemandCost), 0), 2) AS standard_edition_savings_pct, + ROUND(100 * (SUM(onDemandCost) - SUM(enterpriseEditionCost)) / NULLIF(SUM(onDemandCost), 0), 2) AS enterprise_edition_savings_pct, + ROUND(100 * (SUM(onDemandCost) - SUM(enterpriseEdition1YearCost)) / NULLIF(SUM(onDemandCost), 0), 2) AS enterprise_1year_savings_pct, + ROUND(100 * (SUM(onDemandCost) - SUM(enterpriseEdition3YearCost)) / NULLIF(SUM(onDemandCost), 0), 2) AS enterprise_3year_savings_pct, + + -- Overall recommendation + CASE + WHEN SUM(onDemandCost) < SUM(standardEditionCost) THEN 'On-demand pricing recommended' + WHEN SUM(enterpriseEdition3YearCost) < SUM(standardEditionCost) AND + SUM(enterpriseEdition3YearCost) < SUM(enterpriseEdition1YearCost) AND + SUM(enterpriseEdition3YearCost) < SUM(enterpriseEditionCost) THEN 'Enterprise Edition with 3-year commitment recommended' + WHEN SUM(enterpriseEdition1YearCost) < SUM(standardEditionCost) AND + SUM(enterpriseEdition1YearCost) < SUM(enterpriseEditionCost) THEN 'Enterprise Edition with 1-year commitment recommended' + WHEN SUM(enterpriseEditionCost) < SUM(standardEditionCost) THEN 'Enterprise Edition recommended' + ELSE 'Standard Edition recommended' + END AS overall_recommendation +FROM + costs; +END From 5921a6f78fe71bf281b39333ef002292dd5081d2 Mon Sep 17 00:00:00 2001 From: Mike Royle Date: Mon, 9 Jun 2025 15:28:42 -0700 Subject: [PATCH 33/52] updated the query to actually multiply by number of slots when calculating the slot-hours --- .../total_billing_recommendation.sql | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/information_schema/total_billing_recommendation.sql b/information_schema/total_billing_recommendation.sql index 03dab44..0a3ddb6 100644 --- a/information_schema/total_billing_recommendation.sql +++ b/information_schema/total_billing_recommendation.sql @@ -54,13 +54,14 @@ WITH *, SAFE_DIVIDE(billedBytes, POW(1024, 4)) * 6.25 AS onDemandCost, - billedDurationHour * 0.04 AS standardEditionCost, - billedDurationHour * 0.06 AS enterpriseEditionCost, - billedDurationHour * 0.048 AS enterpriseEdition1YearCost, - billedDurationHour * 0.036 AS enterpriseEdition3YearCost, - billedDurationHour * 0.1 AS enterprisePlusEditionCost, - billedDurationHour * 0.08 AS enterprisePlusEdition1YearCost, - billedDurationHour * 0.06 AS enterprisePlusEdition3YearCost + -- Multiply by roundedUpSlots to correctly calculate slot-hours cost + roundedUpSlots * billedDurationHour * 0.04 AS standardEditionCost, + roundedUpSlots * billedDurationHour * 0.06 AS enterpriseEditionCost, + roundedUpSlots * billedDurationHour * 0.048 AS enterpriseEdition1YearCost, + roundedUpSlots * billedDurationHour * 0.036 AS enterpriseEdition3YearCost, + roundedUpSlots * billedDurationHour * 0.1 AS enterprisePlusEditionCost, + roundedUpSlots * billedDurationHour * 0.08 AS enterprisePlusEdition1YearCost, + roundedUpSlots * billedDurationHour * 0.06 AS enterprisePlusEdition3YearCost FROM rounded ) From 76c010a37daae028f4b1d83f1ada7ebd435e0232 Mon Sep 17 00:00:00 2001 From: Mike Royle Date: Tue, 10 Jun 2025 13:10:48 -0700 Subject: [PATCH 34/52] took the billing_recommendation_per_query query and stripped out the recommendations so we can just get the proper billing data --- information_schema/costs_by_query.sql | 112 ++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) create mode 100644 information_schema/costs_by_query.sql diff --git a/information_schema/costs_by_query.sql b/information_schema/costs_by_query.sql new file mode 100644 index 0000000..c640175 --- /dev/null +++ b/information_schema/costs_by_query.sql @@ -0,0 +1,112 @@ +-- This query analyzes BigQuery usage over a specified timeframe +-- focusing on slot usage and data processed without edition recommendations + +-- Change this value to change how far in the past the query will search +DECLARE interval_in_days INT64 DEFAULT 7; + +BEGIN +WITH + src AS ( + SELECT + ROUND(SAFE_DIVIDE(total_slot_ms, + TIMESTAMP_DIFF(end_time, start_time, MILLISECOND)), 2) AS approximateSlotCount, + job_type, + query, + project_id AS projectId, + start_time AS startTime, + end_time AS endTime, + ROUND(COALESCE(total_bytes_billed, + 0), 2) AS totalBytesBilled, + ROUND(COALESCE(total_bytes_billed, + 0) / POW(1024, 2), 2) AS totalMegabytesBilled, + ROUND(COALESCE(total_bytes_billed, + 0) / POW(1024, 3), 2) AS totalGigabytesBilled, + ROUND(COALESCE(total_bytes_billed, + 0) / POW(1024, 4), 2) AS totalTerabytesBilled, + TIMESTAMP_DIFF(end_time, start_time, MILLISECOND) AS executionTimeMs + FROM + ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT + WHERE + creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) + AND CURRENT_TIMESTAMP() + AND total_slot_ms IS NOT NULL + AND state = "DONE" + ORDER BY + approximateSlotCount DESC + ), + rounded AS ( + SELECT + *, + -- Rounds up to the nearest 50 slots (autoscaler increments) + floor((CEIL(approximateSlotCount) + 49) / 50) * 50 AS roundedUpSlots, + + -- If query ran in under 1 minute (60 seconds * 1000 ms) then round up to 1 minute + IF(executionTimeMs < 1000*60, 1000*60, executionTimeMs) AS billedDurationMs, + -- Calculates the duration in hours for calculating slot/hours used + -- Formula: (Execution Time in ms)/(1000 ms * 60 seconds * 60 minutes) + IF(executionTimeMs < 1000*60, 1000*60, executionTimeMs)/(1000*60*60) AS billedDurationHour, + + -- Apply minimum 10 MiB billing per query + GREATEST(totalBytesBilled, 10 * POW(1024, 2)) AS billedBytes, + GREATEST(totalMegabytesBilled, 10) AS billedMegabytes, + GREATEST(totalGigabytesBilled, 10/1024) AS billedGigabytes, + GREATEST(totalTerabytesBilled, 10/POW(1024, 2)) AS billedTerabytes + FROM src + ), + queryCounts AS ( + SELECT + query, + COUNT(query) AS queryCount + FROM + src + GROUP BY + query + ), + queryMetrics AS ( + SELECT + rounded.query, + projectId, + startTime, + endTime, + billedDurationHour, + billedBytes, + billedMegabytes, + billedGigabytes, + billedTerabytes, + approximateSlotCount, + roundedUpSlots, + -- Calculate slot-hours for this query + roundedUpSlots * billedDurationHour AS slotHours, + -- Calculate on-demand cost + SAFE_DIVIDE(billedBytes, POW(1024, 4)) * 6.25 AS onDemandCost, + queryCount + FROM + rounded + JOIN + queryCounts + ON + rounded.query = queryCounts.query + ) + +-- Final output with query metrics +SELECT + projectId, + query, + startTime, + endTime, + billedDurationHour AS queryDurationHours, + approximateSlotCount, + roundedUpSlots AS billedSlotCount, + slotHours, + onDemandCost, + queryCount AS executionCount, + billedBytes, + billedMegabytes, + billedGigabytes, + billedTerabytes +FROM + queryMetrics +ORDER BY + onDemandCost DESC, + slotHours DESC; +END From a72724351560cf4f80f779a48d60dedd9dda6cc5 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Tue, 29 Jul 2025 12:06:57 -0500 Subject: [PATCH 35/52] Update billing_recommendation_per_query.sql Updated to account for the autoscaler to scale up in increments of 50 instead of 100. --- information_schema/billing_recommendation_per_query.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/billing_recommendation_per_query.sql b/information_schema/billing_recommendation_per_query.sql index 580a55a..abc2938 100644 --- a/information_schema/billing_recommendation_per_query.sql +++ b/information_schema/billing_recommendation_per_query.sql @@ -44,8 +44,8 @@ WITH rounded AS ( SELECT *, - -- Rounds up to the nearest 100 slots - floor((CEIL(approximateSlotCount) + 99) / 100) * 100 AS roundedUpSlots, + -- Rounds up to the nearest 50 slots for the autoscaler + CEIL(approximateSlotCount / 50) * 50 AS roundedUpSlots, -- If query ran in under 1 minute (60 seconds * 1000 ms) then round up to 1 minute IF(executionTimeMs < 1000*60, 1000*60, executionTimeMs) AS billedDurationMs, From 31ca47dfc8f218e259312bafd495aa3f275b8a18 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Tue, 29 Jul 2025 16:41:25 -0500 Subject: [PATCH 36/52] Update project_storage_model_recommendation.sql Added in a location to the query suggestion so it runs in correct region all of the time. --- .../project_storage_model_recommendation.sql | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/information_schema/project_storage_model_recommendation.sql b/information_schema/project_storage_model_recommendation.sql index c3486c0..94478c5 100644 --- a/information_schema/project_storage_model_recommendation.sql +++ b/information_schema/project_storage_model_recommendation.sql @@ -15,6 +15,7 @@ DECLARE long_term_physical_price_per_gb NUMERIC DEFAULT 0.022; */ -- Do not modify these two lines +DECLARE location_query_template STRING DEFAULT "SET @@location = ';\n"; DECLARE physical_storage_query_template STRING DEFAULT "ALTER SCHEMA `` SET OPTIONS(storage_billing_model = 'PHYSICAL')"; DECLARE logical_storage_query_template STRING DEFAULT "ALTER SCHEMA `` SET OPTIONS(storage_billing_model = 'LOGICAL')"; @@ -231,14 +232,21 @@ SELECT logical_storage_price < physical_storage_price, -- Do nothing NULL, - -- Use the change to physical storage query - REPLACE(physical_storage_query_template, '', dataset) + CONCAT( + -- Add in the location + REPLACE(location_query_template, 'region-', ''), + -- Use the change to physical storage query + REPLACE(physical_storage_query_template, '', dataset)) ), -- Is on physical storage currently IF( logical_storage_price < physical_storage_price, - -- Use the change to logical storage query - REPLACE(logical_storage_query_template, '', dataset), + CONCAT( + -- Add in the location + REPLACE(location_query_template, 'region-', ''), + -- Use the change to logical storage query + REPLACE(logical_storage_query_template, '', dataset)) + , -- Do nothing NULL ) @@ -248,4 +256,4 @@ SELECT --final_data.* EXCEPT(dataset) FROM final_data -; \ No newline at end of file +; From f25ac55a6baca1b8be6bfa934fa8c6f915aa6acd Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Mon, 11 Aug 2025 16:18:46 -0500 Subject: [PATCH 37/52] Update project_storage_model_recommendation.sql Added in a missing quote. --- information_schema/project_storage_model_recommendation.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/information_schema/project_storage_model_recommendation.sql b/information_schema/project_storage_model_recommendation.sql index 94478c5..c32035f 100644 --- a/information_schema/project_storage_model_recommendation.sql +++ b/information_schema/project_storage_model_recommendation.sql @@ -15,7 +15,7 @@ DECLARE long_term_physical_price_per_gb NUMERIC DEFAULT 0.022; */ -- Do not modify these two lines -DECLARE location_query_template STRING DEFAULT "SET @@location = ';\n"; +DECLARE location_query_template STRING DEFAULT "SET @@location = '';\n"; DECLARE physical_storage_query_template STRING DEFAULT "ALTER SCHEMA `` SET OPTIONS(storage_billing_model = 'PHYSICAL')"; DECLARE logical_storage_query_template STRING DEFAULT "ALTER SCHEMA `` SET OPTIONS(storage_billing_model = 'LOGICAL')"; From 19dac7cf3bd9f219ae9a4151383412b78f7d428c Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Wed, 17 Sep 2025 10:37:39 -0500 Subject: [PATCH 38/52] Added autoscale_usage_and_costs_per_second.sql query which provides per-second autoscale usage and cost data from the INFORMATION_SCHEMA.RESERVATIONS_TIMELINE view. --- .../autoscale_usage_and_costs_per_second.sql | 76 +++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 information_schema/autoscale_usage_and_costs_per_second.sql diff --git a/information_schema/autoscale_usage_and_costs_per_second.sql b/information_schema/autoscale_usage_and_costs_per_second.sql new file mode 100644 index 0000000..b04127d --- /dev/null +++ b/information_schema/autoscale_usage_and_costs_per_second.sql @@ -0,0 +1,76 @@ +/* + * This query retrieves a detailed, per-second timeline of how slots were utilized and assigned, helping you monitor and understand your BigQuery reservation performance. + * It is useful for observing the autoscaler behavior on a per-reservation basis. + + * Instructions for use: + * 1. Modify the and placeholders below to match your required values. + * 2. Change the interval_in_days value to travel back further in time. By default this is 7 days. + + * Note: If not using the US or EU multi-regions, then the costs may be different. + * Change the standard_edition_cost, enterprise_edition_cost, and enterprise_plus_edition_cost values below to match the actual cost listed here: + * https://cloud.google.com/bigquery/pricing?hl=en#:~:text=size%20calculation.-,Capacity%20compute%20pricing,-BigQuery%20offers%20a + */ + +-- Modify this to go further back in time +DECLARE interval_in_days INT64 DEFAULT 7; + +-- Modify these values if not using the US or EU multi-regions +-- Values can be found here: https://cloud.google.com/bigquery/pricing?hl=en#:~:text=size%20calculation.-,Capacity%20compute%20pricing,-BigQuery%20offers%20a +DECLARE standard_edition_cost NUMERIC DEFAULT 0.04; +DECLARE enterprise_edition_cost NUMERIC DEFAULT 0.06; +DECLARE enterprise_plus_edition_cost NUMERIC DEFAULT 0.10; + +WITH + slot_data AS ( + SELECT + -- Start time (aggregated per-second) + details.start_time, + reservation_name, + edition, + -- Number of slots added to the reservation by autoscaling at this second. + details.autoscale_current_slots, + ((CASE edition + WHEN 'STANDARD' THEN standard_edition_cost + WHEN 'ENTERPRISE' THEN enterprise_edition_cost + WHEN 'ENTERPRISE PLUS' THEN enterprise_plus_edition_cost + END + )/3600) * details.autoscale_current_slots AS autoscaled_cost, + -- Maximum number of slots that could be added to the reservation by autoscaling at this second. + details.autoscale_max_slots, + -- Number of slots assigned to this reservation at this second. It equals the baseline slot capacity of a reservation. + details.slots_assigned, + ((CASE edition + WHEN 'STANDARD' THEN standard_edition_cost + WHEN 'ENTERPRISE' THEN enterprise_edition_cost + WHEN 'ENTERPRISE PLUS' THEN enterprise_plus_edition_cost + END + )/3600) * details.slots_assigned AS slots_assigned_cost, + -- Maximum slot capacity for this reservation, including slot sharing at this second. + -- If ignore_idle_slots is true, this field is same as slots_assigned. + -- Otherwise, the slots_max_assigned field is the total number of slots in all capacity commitments in the administration project. + details.slots_max_assigned + FROM + -- Schema reference is here: https://cloud.google.com/bigquery/docs/information-schema-reservation-timeline#schema + -- Change the and placeholders below to match your required values + ``.``.INFORMATION_SCHEMA.RESERVATIONS_TIMELINE, + -- The per_second_details is an array of structs, so needs to be unnested and joined versus rest of the data + -- The full row is per minute, and the details are per second within that minute thus the need for an array in this column + UNNEST(per_second_details) AS details + WHERE + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) + AND CURRENT_TIMESTAMP() +) + +SELECT + start_time, + reservation_name, + autoscale_current_slots, + autoscaled_cost, + slots_assigned slots_assigned_cost, + slots_max_assigned, + autoscaled_cost + slots_assigned_cost AS total_slots_cost, + CONCAT('$ ',FORMAT("%'.2f", autoscaled_cost + slots_assigned_cost)) AS total_slots_cost_formatted +FROM + slot_data +ORDER BY + start_time DESC \ No newline at end of file From 79e672d1ffe9889b5042972ac7590740a10b4fb8 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:46:04 -0500 Subject: [PATCH 39/52] Update top_billed_queries_deduplicated.sql Fixed up the unit names to correctly specify what they are. --- information_schema/top_billed_queries_deduplicated.sql | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/information_schema/top_billed_queries_deduplicated.sql b/information_schema/top_billed_queries_deduplicated.sql index 89c9a6e..24e0340 100644 --- a/information_schema/top_billed_queries_deduplicated.sql +++ b/information_schema/top_billed_queries_deduplicated.sql @@ -18,11 +18,11 @@ BEGIN ROUND(COALESCE(total_bytes_billed, 0), 2) AS totalBytesBilled, ROUND(COALESCE(total_bytes_billed, - 0) / POW(1024, 2), 2) AS totalMegabytesBilled, + 0) / POW(1024, 2), 2) AS totalMebibytesBilled, ROUND(COALESCE(total_bytes_billed, - 0) / POW(1024, 3), 2) AS totalGigabytesBilled, + 0) / POW(1024, 3), 2) AS totalGigibytesBilled, ROUND(COALESCE(total_bytes_billed, - 0) / POW(1024, 4), 2) AS totalTerabytesBilled, + 0) / POW(1024, 4), 2) AS totalTebibytesBilled, ROUND(SAFE_DIVIDE(total_bytes_billed, POW(1024, 4)) * 6.25, 2) AS on_demand_cost, ROUND(SAFE_DIVIDE(total_slot_ms, From 3741677155c5bca08c4679909b2b02e834a2189f Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Mon, 22 Sep 2025 12:00:53 -0500 Subject: [PATCH 40/52] Update autoscale_usage_and_costs_per_second.sql Added the autoscale max slots and a ratio of used autoscale usage. --- information_schema/autoscale_usage_and_costs_per_second.sql | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/information_schema/autoscale_usage_and_costs_per_second.sql b/information_schema/autoscale_usage_and_costs_per_second.sql index b04127d..bffb582 100644 --- a/information_schema/autoscale_usage_and_costs_per_second.sql +++ b/information_schema/autoscale_usage_and_costs_per_second.sql @@ -65,6 +65,8 @@ SELECT start_time, reservation_name, autoscale_current_slots, + autoscale_max_slots, + autoscale_current_slots / autoscale_max_slots AS autoscale_usage_ratio, autoscaled_cost, slots_assigned slots_assigned_cost, slots_max_assigned, @@ -73,4 +75,4 @@ SELECT FROM slot_data ORDER BY - start_time DESC \ No newline at end of file + start_time DESC From dea15c2b09974156a46becc8182fe519dcf2af9c Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Mon, 22 Sep 2025 13:58:25 -0500 Subject: [PATCH 41/52] Added a query that will show the queries running and the reservation usage per second over a time interval. --- ...eries_and_reservation_usage_per_second.sql | 99 +++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 information_schema/queries_and_reservation_usage_per_second.sql diff --git a/information_schema/queries_and_reservation_usage_per_second.sql b/information_schema/queries_and_reservation_usage_per_second.sql new file mode 100644 index 0000000..2fccaf2 --- /dev/null +++ b/information_schema/queries_and_reservation_usage_per_second.sql @@ -0,0 +1,99 @@ +/* + * This query retrieves a detailed, per-second timeline of how slots were utilized and assigned, helping you monitor and understand your BigQuery reservation performance. + * It is useful for observing the autoscaler behavior on a per-reservation basis. + + * Instructions for use: + * 1. Modify the and placeholders below to match your required values. + * 2. Change the interval_in_days value to travel back further in time. By default this is 7 days. + * + * Note this query doesn't include costs and only shows the jobs running to keep it simple and focused on jobs running during the periods. + * Costs can be added to slot_data by pulling lines from this other query: + * https://github.com/doitintl/bigquery-optimization-queries/blob/main/information_schema/autoscale_usage_and_costs_per_second.sql + */ + +-- Modify this to go further back in time +DECLARE interval_in_days INT64 DEFAULT 7; + +WITH slot_data AS +( + SELECT + -- Start time (aggregated per-second) + details.start_time, + reservation_name, + edition, + -- Number of slots added to the reservation by autoscaling at this second. + details.autoscale_current_slots, + -- Maximum number of slots that could be added to the reservation by autoscaling at this second. + details.autoscale_max_slots, + -- Number of slots assigned to this reservation at this second. It equals the baseline slot capacity of a reservation. + details.slots_assigned, + -- Maximum slot capacity for this reservation, including slot sharing at this second. + -- If ignore_idle_slots is true, this field is same as slots_assigned. + -- Otherwise, the slots_max_assigned field is the total number of slots in all capacity commitments in the administration project. + details.slots_max_assigned + FROM + -- Schema reference is here: https://cloud.google.com/bigquery/docs/information-schema-reservation-timeline#schema + -- Change the and placeholders below to match your required values + ``.``.INFORMATION_SCHEMA.RESERVATIONS_TIMELINE, + -- The per_second_details is an array of structs, so needs to be unnested and joined versus rest of the data + -- The full row is per minute, and the details are per second within that minute thus the need for an array in this column + UNNEST(per_second_details) AS details + WHERE + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) + AND CURRENT_TIMESTAMP() +), +job_data AS +( + SELECT + period_start, + -- Extract the reservation name from the fully-qualified reservation ID returned by this view + SPLIT(reservation_id, '.')[OFFSET(ARRAY_LENGTH(SPLIT(reservation_id, '.')) - 1)] AS reservation_name, + -- Create an array of structs showing the jobs that were running during this period + ARRAY_AGG( + STRUCT( + period_slot_ms, + period_slot_ms/1000 AS period_slot_s, + project_id, + user_email, + job_id, + statement_type, + job_start_time, + state, + total_bytes_billed + ) + ) AS jobs + FROM + ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE_BY_ORGANIZATION + WHERE + period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) + AND CURRENT_TIMESTAMP() + -- Note that this only pulls queries and excludes SCRIPT job types + -- Change this if needed for other job types + AND job_type = 'QUERY' + AND (statement_type != 'SCRIPT' OR statement_type IS NULL) + GROUP BY + period_start, reservation_name +), +combined_data AS +( + SELECT + start_time, + slot_data.reservation_name, + edition, + autoscale_current_slots, + autoscale_max_slots, + slots_assigned, + slots_max_assigned, + jobs + FROM + slot_data JOIN job_data + ON slot_data.start_time = job_data.period_start + AND slot_data.reservation_name = job_data.reservation_name +) + +SELECT + * +FROM + combined_data +ORDER BY + combined_data.start_time DESC \ No newline at end of file From 0bc7464379dd10b9bb39fba5883e770ce181de31 Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Mon, 22 Sep 2025 14:01:57 -0500 Subject: [PATCH 42/52] Added some clarifications about the data retention and the jobs field. --- .../queries_and_reservation_usage_per_second.sql | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/information_schema/queries_and_reservation_usage_per_second.sql b/information_schema/queries_and_reservation_usage_per_second.sql index 2fccaf2..31ad9c0 100644 --- a/information_schema/queries_and_reservation_usage_per_second.sql +++ b/information_schema/queries_and_reservation_usage_per_second.sql @@ -6,6 +6,10 @@ * 1. Modify the and placeholders below to match your required values. * 2. Change the interval_in_days value to travel back further in time. By default this is 7 days. * + * Important Notes: + * The INFORMATION_SCHEMA.JOBS_TIMELINE_BY_ORGANIZATION view only retains data for 7 days, so you will only be able to go back that far in time. + * The jobs column is a repeated field (an array of structs) showing the jobs that were running during that second for that reservation. + * * Note this query doesn't include costs and only shows the jobs running to keep it simple and focused on jobs running during the periods. * Costs can be added to slot_data by pulling lines from this other query: * https://github.com/doitintl/bigquery-optimization-queries/blob/main/information_schema/autoscale_usage_and_costs_per_second.sql From 1ef52478f2740adf98153ec2bd1968cbb21d410d Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Mon, 22 Sep 2025 14:17:50 -0500 Subject: [PATCH 43/52] Added billing data to the query. --- ...eries_and_reservation_usage_per_second.sql | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/information_schema/queries_and_reservation_usage_per_second.sql b/information_schema/queries_and_reservation_usage_per_second.sql index 31ad9c0..2876924 100644 --- a/information_schema/queries_and_reservation_usage_per_second.sql +++ b/information_schema/queries_and_reservation_usage_per_second.sql @@ -10,14 +10,20 @@ * The INFORMATION_SCHEMA.JOBS_TIMELINE_BY_ORGANIZATION view only retains data for 7 days, so you will only be able to go back that far in time. * The jobs column is a repeated field (an array of structs) showing the jobs that were running during that second for that reservation. * - * Note this query doesn't include costs and only shows the jobs running to keep it simple and focused on jobs running during the periods. - * Costs can be added to slot_data by pulling lines from this other query: - * https://github.com/doitintl/bigquery-optimization-queries/blob/main/information_schema/autoscale_usage_and_costs_per_second.sql + * Note: If not using the US or EU multi-regions, then the costs may be different. + * Change the standard_edition_cost, enterprise_edition_cost, and enterprise_plus_edition_cost values below to match the actual cost listed here: + * https://cloud.google.com/bigquery/pricing?hl=en#:~:text=size%20calculation.-,Capacity%20compute%20pricing,-BigQuery%20offers%20a */ -- Modify this to go further back in time DECLARE interval_in_days INT64 DEFAULT 7; +-- Modify these values if not using the US or EU multi-regions +-- Values can be found here: https://cloud.google.com/bigquery/pricing?hl=en#:~:text=size%20calculation.-,Capacity%20compute%20pricing,-BigQuery%20offers%20a +DECLARE standard_edition_cost NUMERIC DEFAULT 0.04; +DECLARE enterprise_edition_cost NUMERIC DEFAULT 0.06; +DECLARE enterprise_plus_edition_cost NUMERIC DEFAULT 0.10; + WITH slot_data AS ( SELECT @@ -57,13 +63,21 @@ job_data AS STRUCT( period_slot_ms, period_slot_ms/1000 AS period_slot_s, + ((CASE edition + WHEN 'STANDARD' THEN standard_edition_cost + WHEN 'ENTERPRISE' THEN enterprise_edition_cost + WHEN 'ENTERPRISE PLUS' THEN enterprise_plus_edition_cost + END + )/3600) * (period_slot_ms/1000) AS editions_cost, project_id, user_email, job_id, statement_type, job_start_time, state, - total_bytes_billed + -- Note that the bytes billed reported are for the entire job, not just the portion that ran during this second + -- This is because the jobs can run across multiple seconds, and the view does not provide this for on-demand jobs + total_bytes_billed AS total_bytes_billed_for_full_job ) ) AS jobs FROM From 6fe809c6d8287c535dc96ae89ecee1b4f56933e3 Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Mon, 22 Sep 2025 14:20:27 -0500 Subject: [PATCH 44/52] Added an additional note about the jobs by orginization view. --- information_schema/queries_and_reservation_usage_per_second.sql | 1 + 1 file changed, 1 insertion(+) diff --git a/information_schema/queries_and_reservation_usage_per_second.sql b/information_schema/queries_and_reservation_usage_per_second.sql index 2876924..b8e7e93 100644 --- a/information_schema/queries_and_reservation_usage_per_second.sql +++ b/information_schema/queries_and_reservation_usage_per_second.sql @@ -7,6 +7,7 @@ * 2. Change the interval_in_days value to travel back further in time. By default this is 7 days. * * Important Notes: + * This uses the INFORMATION_SCHEMA.TIMELINE_BY_ORGANIZATION vieW, which might need additional permissions or you might need to add specific reservations to the WHERE clause to filter out. * The INFORMATION_SCHEMA.JOBS_TIMELINE_BY_ORGANIZATION view only retains data for 7 days, so you will only be able to go back that far in time. * The jobs column is a repeated field (an array of structs) showing the jobs that were running during that second for that reservation. * From bc0f0c9600728dad2fb3867799d878370a0b171c Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Thu, 25 Sep 2025 08:27:50 -0500 Subject: [PATCH 45/52] Update queries_and_reservation_usage_per_second.sql Added in a filter on the partition field to reduce data processing of the query. --- .../queries_and_reservation_usage_per_second.sql | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/information_schema/queries_and_reservation_usage_per_second.sql b/information_schema/queries_and_reservation_usage_per_second.sql index b8e7e93..800ba3b 100644 --- a/information_schema/queries_and_reservation_usage_per_second.sql +++ b/information_schema/queries_and_reservation_usage_per_second.sql @@ -86,6 +86,12 @@ job_data AS WHERE period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() + -- Use the partition column to grab jobs as well to reduce bytes processed + -- This might cause some jobs to be lost if they started before the boundary, but otherwise + -- this query might process too much data for many users. + -- Note this is job_creation_time, not creation_time as the docs say + AND job_creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) + AND CURRENT_TIMESTAMP() -- Note that this only pulls queries and excludes SCRIPT job types -- Change this if needed for other job types AND job_type = 'QUERY' @@ -115,4 +121,4 @@ SELECT FROM combined_data ORDER BY - combined_data.start_time DESC \ No newline at end of file + combined_data.start_time DESC From efe2a73944ee688c4c135e703344f16f49bfce85 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Mon, 6 Oct 2025 13:32:55 -0500 Subject: [PATCH 46/52] Update general_job_information.sql Added additional columns showing reservations and billing type. --- information_schema/general_job_information.sql | 2 ++ 1 file changed, 2 insertions(+) diff --git a/information_schema/general_job_information.sql b/information_schema/general_job_information.sql index 94927c6..4453464 100644 --- a/information_schema/general_job_information.sql +++ b/information_schema/general_job_information.sql @@ -8,6 +8,8 @@ BEGIN SELECT user_email AS user, job_type AS jobType, + reservation_id AS reservationId, + IF(reservation_id IS NULL, 'On-Demand', 'Editions') AS queryBillingType, query, job_id AS jobId, project_id AS projectId, From 67b88ed38dedb26942824002f7ca2cdea2209ac2 Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Tue, 7 Oct 2025 13:44:07 -0500 Subject: [PATCH 47/52] Added a new SQL script to track table usage and costs per day. --- .../table_usage_and_costs_per_day.sql | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 information_schema/table_usage_and_costs_per_day.sql diff --git a/information_schema/table_usage_and_costs_per_day.sql b/information_schema/table_usage_and_costs_per_day.sql new file mode 100644 index 0000000..7d52246 --- /dev/null +++ b/information_schema/table_usage_and_costs_per_day.sql @@ -0,0 +1,57 @@ +-- This query shows costs and billed-usage per-day per table in your project. +-- It may not be up-to-date as the data is typically 2-3 days behind inside of information schema + +-- Change this value to change how far in the past the query will search +-- Note currently (as of 10/7/2025) this view only supports up to 90-days in the past +DECLARE interval_in_days INT64 DEFAULT 7; + +-- Change these for different regions, this value is for the US multi-region +-- Note these values are in 1 gigibyte (gib) hours +-- These values can be found here: https://cloud.google.com/bigquery/pricing?hl=en#storage-pricing +DECLARE active_logical_storage_per_hour NUMERIC DEFAULT 0.000027397; +DECLARE long_term_logical_storage_per_hour NUMERIC DEFAULT 0.000013699; + +DECLARE active_physical_storage_per_hour NUMERIC DEFAULT 0.000054795; +DECLARE long_term_physical_storage_per_hour NUMERIC DEFAULT 0.000027397; + +SELECT + usage_date, + project_id, + table_schema, + table_name, + SUM(billable_total_logical_usage)/POW(1024, 3) AS billable_total_logical_usage_in_gibytes, + SUM(billable_total_logical_usage)/POW(1024, 4) AS billable_total_logical_usage_in_tibytes, + + SUM(billable_active_logical_usage)/POW(1024, 3) AS billable_active_logical_usage_in_gibytes, + SUM(billable_active_logical_usage)/POW(1024, 4) AS billable_active_logical_usage_in_tibytes, + SUM(billable_active_logical_usage)/POW(1024, 3) * active_logical_storage_per_hour AS active_logical_cost, + CONCAT('$ ',FORMAT("%'.2f", SUM(billable_active_logical_usage)/POW(1024, 3) * active_logical_storage_per_hour)) AS active_logical_pretty_cost, + + SUM(billable_long_term_logical_usage)/POW(1024, 3) AS billable_long_term_logical_usage_in_gibytes, + SUM(billable_long_term_logical_usage)/POW(1024, 4) AS billable_long_term_logical_usage_in_tibytes, + SUM(billable_long_term_logical_usage)/POW(1024, 3) * long_term_logical_storage_per_hour AS long_term_logical_cost, + CONCAT('$ ',FORMAT("%'.2f", SUM(billable_long_term_logical_usage)/POW(1024, 3) * long_term_logical_storage_per_hour)) AS long_term_logical_pretty_cost, + + SUM(billable_total_physical_usage)/POW(1024, 3) AS billable_total_physical_usage_in_gibytes, + SUM(billable_total_physical_usage)/POW(1024, 4) AS billable_total_physical_usage_in_tibytes, + + SUM(billable_active_physical_usage)/POW(1024, 3) AS billable_active_physical_usage_in_gibytes, + SUM(billable_active_physical_usage)/POW(1024, 4) AS billable_active_physical_usage_in_tibytes, + SUM(billable_active_physical_usage)/POW(1024, 3) * active_physical_storage_per_hour AS active_physical_cost, + CONCAT('$ ',FORMAT("%'.2f", SUM(billable_active_physical_usage)/POW(1024, 3) * active_physical_storage_per_hour)) AS long_term_physical_pretty_cost, + + SUM(billable_long_term_physical_usage)/POW(1024, 3) AS billable_long_term_physical_usage_in_gibytes, + SUM(billable_long_term_physical_usage)/POW(1024, 4) AS billable_long_term_physical_usage_in_tibytes, + SUM(billable_long_term_physical_usage)/POW(1024, 3) * long_term_physical_storage_per_hour AS long_term_logical_cost, + CONCAT('$ ',FORMAT("%'.2f", SUM(billable_long_term_physical_usage)/POW(1024, 3) * long_term_physical_storage_per_hour)) AS long_term_physical_pretty_cost +FROM + ``.``.INFORMATION_SCHEMA.TABLE_STORAGE_USAGE_TIMELINE +WHERE + usage_date >= DATE_SUB(CURRENT_DATE(), INTERVAL interval_in_days DAY) +GROUP BY + 1, + 2, + 3, + 4 +ORDER BY + usage_date; \ No newline at end of file From d169738aa59b1535f30d6938006c144afb93ff16 Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Wed, 15 Oct 2025 16:38:56 -0500 Subject: [PATCH 48/52] Added a new query that will find tables referenced by jobs over an interval. It then calculates the price of those tables and displays the total cost and when it was last referenced. --- ...table_storage_by_most_recently_queried.sql | 202 ++++++++++++++++++ 1 file changed, 202 insertions(+) create mode 100644 information_schema/table_storage_by_most_recently_queried.sql diff --git a/information_schema/table_storage_by_most_recently_queried.sql b/information_schema/table_storage_by_most_recently_queried.sql new file mode 100644 index 0000000..a898dbd --- /dev/null +++ b/information_schema/table_storage_by_most_recently_queried.sql @@ -0,0 +1,202 @@ +/* + * This query retrieves the table(s) each job references and joins it to that table's storage size along with storing the last queried time during the range. + * It then calculates the price for that table and displays it along with the storage data. + + * Instructions for use: + * 1. Modify the and placeholders below to match your required values. + * 2. Change the interval_in_days value to travel back further in time. By default this is 14 days. + + * Note: If not using the US or EU multi-regions, then the storage costs may be different. + * Change the storage price values below to match the actual cost listed here: + * https://cloud.google.com/bigquery/pricing?hl=en#storage-pricing + */ + +-- Change this value to change how far in the past the query will search +DECLARE interval_in_days INT64 DEFAULT 14; + +-- Update this with the correct values for your region (default is for US multi-region) +-- EU multi-region values are below. +-- This is for price per gib monhtly (not hourly) +-- Can be found here: https://cloud.google.com/bigquery/pricing?hl=en#storage-pricing +DECLARE active_logical_price NUMERIC DEFAULT 0.02; +DECLARE long_term_logical_price NUMERIC DEFAULT 0.01; +DECLARE active_physical_price NUMERIC DEFAULT 0.04; +DECLARE long_term_physical_price NUMERIC DEFAULT 0.02; + +-- These values are for the EU multi-region +-- Comment these out and uncomment above if using the US multi-region +/* +DECLARE active_logical_price NUMERIC DEFAULT 0.02; +DECLARE long_term_logical_price NUMERIC DEFAULT 0.01; +DECLARE active_physical_price NUMERIC DEFAULT 0.044; +DECLARE long_term_physical_price NUMERIC DEFAULT 0.022; +*/ + +WITH tables AS ( + -- Basic table data and last query usage + SELECT + -- Create the fully qualified table name for joining + t_ref.project_id, + t_ref.dataset_id, + t_ref.table_id, + + -- Using creation time as the last queried time since this is the partitioned column in the JOBS view + MAX(j.creation_time) AS last_queried_time + FROM + ``.``.INFORMATION_SCHEMA.JOBS_BY_PROJECT AS j, + -- Extract the referenced tables + UNNEST(j.referenced_tables) AS t_ref + WHERE + -- Only grab successful query jobs + j.job_type = 'QUERY' + AND j.state = 'DONE' + + -- Filter on the given range + AND creation_time BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) + AND CURRENT_TIMESTAMP() + GROUP BY 1, 2, 3 +), +table_billing_type AS ( + -- Table billing type + SELECT + ts.project_id, + ts.table_schema AS dataset_id, + ts.table_name AS table_id, + + -- Get the billing model option. Defaults to LOGICAL if the option is not explicitly set. + IFNULL(topt.option_value, 'LOGICAL') AS storage_billing_model, + + -- Conditionally determine the Total Billed Bytes + CASE + -- Use PHYSICAL bytes if the option is explicitly set to PHYSICAL + WHEN topt.option_value = 'PHYSICAL' THEN ts.total_physical_bytes + -- Otherwise (LOGICAL or NULL/default), use LOGICAL bytes + ELSE ts.total_logical_bytes + END AS total_billed_bytes, + + -- Conditionally determine the Active Billed Bytes + CASE + WHEN topt.option_value = 'PHYSICAL' THEN ts.active_physical_bytes + ELSE ts.active_logical_bytes + END AS active_billed_bytes, + + -- Conditionally determine the Long-Term Billed Bytes + CASE + WHEN topt.option_value = 'PHYSICAL' THEN ts.long_term_physical_bytes + ELSE ts.long_term_logical_bytes + END AS long_term_billed_bytes + + FROM + ``.``.INFORMATION_SCHEMA.TABLE_STORAGE AS ts + INNER JOIN + -- Join with TABLES view to get the original creation time + ``.``.INFORMATION_SCHEMA.TABLES AS t + ON ts.project_id = t.table_catalog + AND ts.table_schema = t.table_schema + AND ts.table_name = t.table_name + LEFT JOIN + -- Left join TABLE_OPTIONS to find the storage billing model (it might be NULL if the default is used or it was converted in July 2023 before this option was added) + ``.``.INFORMATION_SCHEMA.TABLE_OPTIONS AS topt + ON ts.project_id = topt.table_catalog + AND ts.table_schema = topt.table_schema + AND ts.table_name = topt.table_name + AND topt.option_name = 'storage_billing_model' +) + +SELECT + m.project_id, + m.dataset_id, + m.table_id, + -- Build a nice message instead of throwing a NULL value in for last queried time + IF(u.last_queried_time IS NULL, CONCAT('Prior to ', STRING(TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY))), STRING(u.last_queried_time)) AS last_queried_time, + m.storage_billing_model, + + -- Total storage price + CONCAT('$ ', + FORMAT("%'.2f", + -- Adds active + long-term price (accounting for physical or logical) + -- Calculate active price + CASE + WHEN m.storage_billing_model = 'LOGICAL' THEN + (m.active_billed_bytes/POW(1024, 3)) * active_logical_price + ELSE + (m.active_billed_bytes/POW(1024, 3)) * active_physical_price + END + + + -- Calculate long-term price + CASE + WHEN m.storage_billing_model = 'LOGICAL' THEN + (m.long_term_billed_bytes/POW(1024, 3)) * long_term_logical_price + ELSE + (m.long_term_billed_bytes/POW(1024, 3)) * long_term_physical_price + END + ) + ) AS table_storage_price, + + -- Total Billed Size + FORMAT('%.2f %s', + m.total_billed_bytes / POW(1024, + CASE + WHEN m.total_billed_bytes >= POW(1024, 4) THEN 4 -- TiB + WHEN m.total_billed_bytes >= POW(1024, 3) THEN 3 -- GiB + WHEN m.total_billed_bytes >= POW(1024, 2) THEN 2 -- MiB + ELSE 1 -- KB + END + ), + CASE + WHEN m.total_billed_bytes >= POW(1024, 4) THEN 'TiB' + WHEN m.total_billed_bytes >= POW(1024, 3) THEN 'GiB' + WHEN m.total_billed_bytes >= POW(1024, 2) THEN 'MiB' + ELSE 'KB' + END + ) AS readable_total_billed_size, + m.total_billed_bytes, + + -- Active Size + FORMAT('%.2f %s', + m.active_billed_bytes / POW(1024, + CASE + WHEN m.active_billed_bytes >= POW(1024, 4) THEN 4 + WHEN m.active_billed_bytes >= POW(1024, 3) THEN 3 + WHEN m.active_billed_bytes >= POW(1024, 2) THEN 2 + ELSE 1 + END + ), + CASE + WHEN m.active_billed_bytes >= POW(1024, 4) THEN 'TiB' + WHEN m.active_billed_bytes >= POW(1024, 3) THEN 'GiB' + WHEN m.active_billed_bytes >= POW(1024, 2) THEN 'MiB' + ELSE 'KB' + END + ) AS readable_active_size, + m.active_billed_bytes, + + -- Long-Term Size + FORMAT('%.2f %s', + m.long_term_billed_bytes / POW(1024, + CASE + WHEN m.long_term_billed_bytes >= POW(1024, 4) THEN 4 + WHEN m.long_term_billed_bytes >= POW(1024, 3) THEN 3 + WHEN m.long_term_billed_bytes >= POW(1024, 2) THEN 2 + ELSE 1 + END + ), + CASE + WHEN m.long_term_billed_bytes >= POW(1024, 4) THEN 'TiB' + WHEN m.long_term_billed_bytes >= POW(1024, 3) THEN 'GiB' + WHEN m.long_term_billed_bytes >= POW(1024, 2) THEN 'MiB' + ELSE 'KB' + END + ) AS readable_long_term_size +FROM + table_billing_type AS m +LEFT JOIN + tables AS u + ON m.project_id = u.project_id + AND m.dataset_id = u.dataset_id + AND m.table_id = u.table_id +ORDER BY + -- Sort by last queried time (oldest first, with NULLs first) + u.last_queried_time ASC NULLS FIRST, + -- Secondary sort by the determined billed size descending + total_billed_bytes DESC; \ No newline at end of file From 9eb51d4a51aaa8d26a061297bd5224ed6d348b00 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Fri, 31 Oct 2025 10:15:03 -0500 Subject: [PATCH 49/52] Clarify table type filtering in SQL query Fixed a bug where all table types were included. It was discovered that Google only bills on the "BASE TABLE" table type; thus, it is filtered on that. --- information_schema/project_storage_model_recommendation.sql | 3 +++ 1 file changed, 3 insertions(+) diff --git a/information_schema/project_storage_model_recommendation.sql b/information_schema/project_storage_model_recommendation.sql index c32035f..ee4ce8e 100644 --- a/information_schema/project_storage_model_recommendation.sql +++ b/information_schema/project_storage_model_recommendation.sql @@ -86,6 +86,9 @@ WITH storage AS AND t.table_name = tb.table_name WHERE tb.deleted = false + -- Only look at the BASE TABLE type, as this is what Google uses in their billing data to + -- bill on even if there are clones, snapshots, etc. + AND t.table_type = 'BASE TABLE' ), schemata_options AS ( From 6351a0ab481c6edb3e6dca5b5797f6fb8dd9886a Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Fri, 31 Oct 2025 10:15:33 -0500 Subject: [PATCH 50/52] Update project_per_table_storage_model_recommendations.sql Fixed a bug where all table types were included. It was discovered that Google only bills on the "BASE TABLE" table type; thus, it is filtered on that. --- .../project_per_table_storage_model_recommendations.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/information_schema/project_per_table_storage_model_recommendations.sql b/information_schema/project_per_table_storage_model_recommendations.sql index d66195d..7a9265e 100644 --- a/information_schema/project_per_table_storage_model_recommendations.sql +++ b/information_schema/project_per_table_storage_model_recommendations.sql @@ -110,6 +110,7 @@ WITH storage AS AND t.table_name = tb.table_name WHERE tb.deleted = false + AND t.table_type = 'BASE TABLE' ), calculations AS ( @@ -186,4 +187,4 @@ SELECT --final_data.* EXCEPT(dataset) FROM final_data -; \ No newline at end of file +; From e668bf331322d13d1adf791158b0cf60860a4dc6 Mon Sep 17 00:00:00 2001 From: Sayle Matthews <61764166+sayle-doit@users.noreply.github.com> Date: Mon, 15 Dec 2025 09:02:12 -0600 Subject: [PATCH 51/52] Fixed job_type to statement_type Google fixed a bug where job_type no longer contains the 'SCRIPT' value so this broke the query. It is now contained in statement_type only. --- information_schema/slots_by_day.sql | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/information_schema/slots_by_day.sql b/information_schema/slots_by_day.sql index 46d9a36..fcfb487 100644 --- a/information_schema/slots_by_day.sql +++ b/information_schema/slots_by_day.sql @@ -14,7 +14,7 @@ BEGIN ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() - AND job_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results + AND statement_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results GROUP BY period_start ORDER BY @@ -43,4 +43,4 @@ FROM joined ORDER BY timeInterval ASC; -END \ No newline at end of file +END From 2b45f99511fb47611c4f7a5cd6a15a8b333d5703 Mon Sep 17 00:00:00 2001 From: Sayle Matthews Date: Mon, 15 Dec 2025 09:03:41 -0600 Subject: [PATCH 52/52] Now use statement_type to check for script job types. Google fixed a long-standing issue where job_type contained 'SCRIPT' for all script jobs, but no longer does. Now only statement_types contain 'SCRIPT' --- information_schema/slots_by_hour.sql | 2 +- information_schema/slots_by_minute.sql | 2 +- information_schema/slots_by_minute_and_user.sql | 2 +- information_schema/slots_by_second.sql | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/information_schema/slots_by_hour.sql b/information_schema/slots_by_hour.sql index 907606a..45f3276 100644 --- a/information_schema/slots_by_hour.sql +++ b/information_schema/slots_by_hour.sql @@ -14,7 +14,7 @@ BEGIN ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() - AND job_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results + AND statement_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results GROUP BY period_start ORDER BY diff --git a/information_schema/slots_by_minute.sql b/information_schema/slots_by_minute.sql index afb34e8..604a719 100644 --- a/information_schema/slots_by_minute.sql +++ b/information_schema/slots_by_minute.sql @@ -14,7 +14,7 @@ BEGIN ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() - AND job_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results + AND statement_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results GROUP BY period_start ORDER BY diff --git a/information_schema/slots_by_minute_and_user.sql b/information_schema/slots_by_minute_and_user.sql index 7722358..4cf154c 100644 --- a/information_schema/slots_by_minute_and_user.sql +++ b/information_schema/slots_by_minute_and_user.sql @@ -16,7 +16,7 @@ BEGIN ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() - AND job_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results + AND statement_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results GROUP BY period_start, user_email diff --git a/information_schema/slots_by_second.sql b/information_schema/slots_by_second.sql index 3746e02..b163697 100644 --- a/information_schema/slots_by_second.sql +++ b/information_schema/slots_by_second.sql @@ -14,7 +14,7 @@ BEGIN ``.``.INFORMATION_SCHEMA.JOBS_TIMELINE WHERE period_start BETWEEN TIMESTAMP_SUB(CURRENT_TIMESTAMP(), INTERVAL interval_in_days DAY) AND CURRENT_TIMESTAMP() - AND job_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results + AND statement_type <> 'SCRIPT' -- Exclude scripts since they pull in child process slots and skew results GROUP BY period_start ORDER BY