Skip to content

Commit 4f824d2

Browse files
committed
Custom queries for prometheus
- Tests for prometheus queries - fix rubocop warnings - Remove unused method. Add more queries to deployment queries. - Wrap BaseQuery in module hierarchy Renname Prometheus class to PrometheusClient
1 parent e7b53dd commit 4f824d2

File tree

13 files changed

+146
-53
lines changed

13 files changed

+146
-53
lines changed

app/controllers/projects/deployments_controller.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def index
1111
end
1212

1313
def metrics
14-
@metrics = deployment.metrics(1.hour)
14+
@metrics = deployment.metrics
1515

1616
if @metrics&.any?
1717
render json: @metrics, status: :ok

app/models/deployment.rb

+2-6
Original file line numberDiff line numberDiff line change
@@ -103,14 +103,10 @@ def has_metrics?
103103
project.monitoring_service.present?
104104
end
105105

106-
def metrics(timeframe)
106+
def metrics
107107
return {} unless has_metrics?
108108

109-
half_timeframe = timeframe / 2
110-
timeframe_start = created_at - half_timeframe
111-
timeframe_end = created_at + half_timeframe
112-
113-
metrics = project.monitoring_service.metrics(environment, timeframe_start: timeframe_start, timeframe_end: timeframe_end)
109+
metrics = project.monitoring_service.deployment_metrics(self)
114110
metrics&.merge(deployment_time: created_at.to_i) || {}
115111
end
116112

app/models/environment.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def has_metrics?
150150
end
151151

152152
def metrics
153-
project.monitoring_service.metrics(self) if has_metrics?
153+
project.monitoring_service.environment_metrics(self) if has_metrics?
154154
end
155155

156156
# An environment name is not necessarily suitable for use in URLs, DNS

app/models/project_services/monitoring_service.rb

+5-2
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@ def self.supported_events
99
%w()
1010
end
1111

12-
# Environments have a number of metrics
13-
def metrics(environment, timeframe_start: nil, timeframe_end: nil)
12+
def environment_metrics(environment)
13+
raise NotImplementedError
14+
end
15+
16+
def deployment_metrics(deployment)
1417
raise NotImplementedError
1518
end
1619
end

app/models/project_services/prometheus_service.rb

+10-25
Original file line numberDiff line numberDiff line change
@@ -63,45 +63,30 @@ def test(*args)
6363
{ success: false, result: err }
6464
end
6565

66-
def metrics(environment, timeframe_start: nil, timeframe_end: nil)
67-
with_reactive_cache(environment.slug, timeframe_start, timeframe_end) do |data|
68-
data
69-
end
66+
def environment_metrics(environment, **args)
67+
with_reactive_cache(Gitlab::Prometheus::Queries::EnvironmentQuery.name, environment.id, &:itself)
68+
end
69+
70+
def deployment_metrics(deployment)
71+
with_reactive_cache(Gitlab::Prometheus::Queries::DeploymentQuery.name, deployment.id, &:itself)
7072
end
7173

7274
# Cache metrics for specific environment
73-
def calculate_reactive_cache(environment_slug, timeframe_start, timeframe_end)
75+
def calculate_reactive_cache(query_class_name, *args)
7476
return unless active? && project && !project.pending_delete?
7577

76-
timeframe_start = Time.parse(timeframe_start) if timeframe_start
77-
timeframe_end = Time.parse(timeframe_end) if timeframe_end
78-
79-
timeframe_start ||= 8.hours.ago
80-
timeframe_end ||= Time.now
81-
82-
memory_query = %{(sum(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / count(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"})) /1024/1024}
83-
cpu_query = %{sum(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) / count(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}) * 100}
78+
metrics = Kernel.const_get(query_class_name).new(client).query(*args)
8479

8580
{
8681
success: true,
87-
metrics: {
88-
# Average Memory used in MB
89-
memory_values: client.query_range(memory_query, start: timeframe_start, stop: timeframe_end),
90-
memory_current: client.query(memory_query, time: timeframe_end),
91-
memory_previous: client.query(memory_query, time: timeframe_start),
92-
# Average CPU Utilization
93-
cpu_values: client.query_range(cpu_query, start: timeframe_start, stop: timeframe_end),
94-
cpu_current: client.query(cpu_query, time: timeframe_end),
95-
cpu_previous: client.query(cpu_query, time: timeframe_start)
96-
},
82+
metrics: metrics,
9783
last_update: Time.now.utc
9884
}
99-
10085
rescue Gitlab::PrometheusError => err
10186
{ success: false, result: err.message }
10287
end
10388

10489
def client
105-
@prometheus ||= Gitlab::Prometheus.new(api_url: api_url)
90+
@prometheus ||= Gitlab::PrometheusClient.new(api_url: api_url)
10691
end
10792
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
module Gitlab
2+
module Prometheus
3+
module Queries
4+
class BaseQuery
5+
attr_accessor :client
6+
delegate :query_range, :query, to: :client, prefix: true
7+
8+
def raw_memory_usage_query(environment_slug)
9+
%{avg(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / 2^20}
10+
end
11+
12+
def raw_cpu_usage_query(environment_slug)
13+
%{avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) * 100}
14+
end
15+
16+
def initialize(client)
17+
@client = client
18+
end
19+
20+
def query(*args)
21+
raise NotImplementedError
22+
end
23+
end
24+
end
25+
end
26+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
module Gitlab::Prometheus::Queries
2+
class DeploymentQuery < BaseQuery
3+
def query(deployment_id)
4+
deployment = Deployment.find_by(id: deployment_id)
5+
environment_slug = deployment.environment.slug
6+
7+
memory_query = raw_memory_usage_query(environment_slug)
8+
memory_avg_query = %{avg(avg_over_time(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}[30m]))}
9+
cpu_query = raw_cpu_usage_query(environment_slug)
10+
cpu_avg_query = %{avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[30m])) * 100}
11+
12+
timeframe_start = (deployment.created_at - 30.minutes).to_f
13+
timeframe_end = (deployment.created_at + 30.minutes).to_f
14+
15+
{
16+
memory_values: client_query_range(memory_query, start: timeframe_start, stop: timeframe_end),
17+
memory_before: client_query(memory_avg_query, time: deployment.created_at.to_f),
18+
memory_after: client_query(memory_avg_query, time: timeframe_end),
19+
20+
cpu_values: client_query_range(cpu_query, start: timeframe_start, stop: timeframe_end),
21+
cpu_before: client_query(cpu_avg_query, time: deployment.created_at.to_f),
22+
cpu_after: client_query(cpu_avg_query, time: timeframe_end),
23+
}
24+
end
25+
end
26+
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
module Gitlab::Prometheus::Queries
2+
class EnvironmentQuery < BaseQuery
3+
def query(environment_id)
4+
environment = Environment.find_by(id: environment_id)
5+
environment_slug = environment.slug
6+
timeframe_start = 8.hours.ago.to_f
7+
timeframe_end = Time.now.to_f
8+
9+
memory_query = raw_memory_usage_query(environment_slug)
10+
cpu_query = raw_cpu_usage_query(environment_slug)
11+
12+
{
13+
memory_values: client_query_range(memory_query, start: timeframe_start, stop: timeframe_end),
14+
memory_current: client_query(memory_query, time: timeframe_end),
15+
cpu_values: client_query_range(cpu_query, start: timeframe_start, stop: timeframe_end),
16+
cpu_current: client_query(cpu_query, time: timeframe_end),
17+
}
18+
end
19+
end
20+
end

lib/gitlab/prometheus.rb renamed to lib/gitlab/prometheus_client.rb

+2-2
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ module Gitlab
22
PrometheusError = Class.new(StandardError)
33

44
# Helper methods to interact with Prometheus network services & resources
5-
class Prometheus
5+
class PrometheusClient
66
attr_reader :api_url
77

88
def initialize(api_url:)
@@ -15,7 +15,7 @@ def ping
1515

1616
def query(query, time: Time.now)
1717
get_result('vector') do
18-
json_api_get('query', query: query, time: time.utc.to_f)
18+
json_api_get('query', query: query, time: time.to_f)
1919
end
2020
end
2121

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
require 'spec_helper'
2+
3+
describe Gitlab::Prometheus::Queries::DeploymentQuery, lib: true do
4+
let(:environment) { create(:environment) }
5+
let(:deployment) { create(:deployment, environment: environment) }
6+
7+
let(:client) { double('prometheus_client') }
8+
subject { described_class.new(client) }
9+
10+
it 'sends appropriate queries to prometheus' do
11+
start_time = (deployment.created_at - 30.minutes).to_f
12+
13+
stop_time = (deployment.created_at + 30.minutes).to_f
14+
expect(client).to receive(:query_range).with('avg(container_memory_usage_bytes{container_name!="POD",environment="environment1"}) / 2^20',
15+
start: start_time, stop: stop_time)
16+
expect(client).to receive(:query).with('avg(avg_over_time(container_memory_usage_bytes{container_name!="POD",environment="environment1"}[30m]))',
17+
time: deployment.created_at.to_f)
18+
expect(client).to receive(:query).with('avg(avg_over_time(container_memory_usage_bytes{container_name!="POD",environment="environment1"}[30m]))',
19+
time: stop_time)
20+
21+
expect(client).to receive(:query_range).with('avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="environment1"}[2m])) * 100',
22+
start: start_time, stop: stop_time)
23+
expect(client).to receive(:query).with('avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="environment1"}[30m])) * 100',
24+
time: deployment.created_at.to_f)
25+
expect(client).to receive(:query).with('avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="environment1"}[30m])) * 100',
26+
time: stop_time)
27+
28+
expect(subject.query(deployment.id)).to eq(memory_values: nil, memory_before: nil, memory_after: nil,
29+
cpu_values: nil, cpu_before: nil, cpu_after: nil)
30+
end
31+
end

spec/lib/gitlab/prometheus_spec.rb renamed to spec/lib/gitlab/prometheus_client_spec.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
require 'spec_helper'
22

3-
describe Gitlab::Prometheus, lib: true do
3+
describe Gitlab::PrometheusClient, lib: true do
44
include PrometheusHelpers
55

66
subject { described_class.new(api_url: 'https://prometheus.example.com') }

spec/models/project_services/prometheus_service_spec.rb

+19-11
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
let(:project) { create(:prometheus_project) }
88
let(:service) { project.prometheus_service }
9+
let(:environment_query) { Gitlab::Prometheus::Queries::EnvironmentQuery }
910

1011
describe "Associations" do
1112
it { is_expected.to belong_to :project }
@@ -45,32 +46,39 @@
4546
end
4647
end
4748

48-
describe '#metrics' do
49+
describe '#environment_metrics' do
4950
let(:environment) { build_stubbed(:environment, slug: 'env-slug') }
5051

5152
around do |example|
5253
Timecop.freeze { example.run }
5354
end
5455

55-
context 'with valid data without time range' do
56-
subject { service.metrics(environment) }
56+
context 'with valid data' do
57+
subject { service.environment_metrics(environment) }
5758

5859
before do
59-
stub_reactive_cache(service, prometheus_data, 'env-slug', nil, nil)
60+
stub_reactive_cache(service, prometheus_data, environment_query, environment.id)
6061
end
6162

6263
it 'returns reactive data' do
6364
is_expected.to eq(prometheus_data)
6465
end
6566
end
67+
end
6668

67-
context 'with valid data with time range' do
68-
let(:t_start) { 1.hour.ago.utc }
69-
let(:t_end) { Time.now.utc }
70-
subject { service.metrics(environment, timeframe_start: t_start, timeframe_end: t_end) }
69+
describe '#deployment_metrics' do
70+
let(:deployment) { build_stubbed(:deployment)}
71+
let(:deployment_query) { Gitlab::Prometheus::Queries::DeploymentQuery }
72+
73+
around do |example|
74+
Timecop.freeze { example.run }
75+
end
76+
77+
context 'with valid data' do
78+
subject { service.deployment_metrics(deployment) }
7179

7280
before do
73-
stub_reactive_cache(service, prometheus_data, 'env-slug', t_start, t_end)
81+
stub_reactive_cache(service, prometheus_data, deployment_query, deployment.id)
7482
end
7583

7684
it 'returns reactive data' do
@@ -80,14 +88,14 @@
8088
end
8189

8290
describe '#calculate_reactive_cache' do
83-
let(:environment) { build_stubbed(:environment, slug: 'env-slug') }
91+
let(:environment) { create(:environment, slug: 'env-slug') }
8492

8593
around do |example|
8694
Timecop.freeze { example.run }
8795
end
8896

8997
subject do
90-
service.calculate_reactive_cache(environment.slug, nil, nil)
98+
service.calculate_reactive_cache(environment_query.to_s, environment.id)
9199
end
92100

93101
context 'when service is inactive' do

spec/support/prometheus_helpers.rb

+2-4
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
module PrometheusHelpers
22
def prometheus_memory_query(environment_slug)
3-
%{(sum(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / count(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"})) /1024/1024}
3+
%{avg(container_memory_usage_bytes{container_name!="POD",environment="#{environment_slug}"}) / 2^20}
44
end
55

66
def prometheus_cpu_query(environment_slug)
7-
%{sum(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) / count(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}) * 100}
7+
%{avg(rate(container_cpu_usage_seconds_total{container_name!="POD",environment="#{environment_slug}"}[2m])) * 100}
88
end
99

1010
def prometheus_ping_url(prometheus_query)
@@ -88,10 +88,8 @@ def prometheus_data(last_update: Time.now.utc)
8888
metrics: {
8989
memory_values: prometheus_values_body('matrix').dig(:data, :result),
9090
memory_current: prometheus_value_body('vector').dig(:data, :result),
91-
memory_previous: prometheus_value_body('vector').dig(:data, :result),
9291
cpu_values: prometheus_values_body('matrix').dig(:data, :result),
9392
cpu_current: prometheus_value_body('vector').dig(:data, :result),
94-
cpu_previous: prometheus_value_body('vector').dig(:data, :result)
9593
},
9694
last_update: last_update
9795
}

0 commit comments

Comments
 (0)