Skip to content

Commit 991bf24

Browse files
committed
Use latest_merge_request_diff association
Compared to the merge_request_diff association: 1. It's simpler to query. The query uses a foreign key to the merge_request_diffs table, so no ordering is necessary. 2. It's faster for preloading. The merge_request_diff association has to load every diff for the MRs in the set, then discard all but the most recent for each. This association means that Rails can just query for N diffs from N MRs. 3. It's more complicated to update. This is a bidirectional foreign key, so we need to update two tables when adding a diff record. This also means we need to handle this as a special case when importing a GitLab project. There is some juggling with this association in the merge request model: * `MergeRequest#latest_merge_request_diff` is _always_ the latest diff. * `MergeRequest#merge_request_diff` reuses `MergeRequest#latest_merge_request_diff` unless: * Arguments are passed. These are typically to force-reload the association. * It doesn't exist. That means we might be trying to implicitly create a diff. This only seems to happen in specs. * The association is already loaded. This is important for the reasons explained in the comment, which I'll reiterate here: if we a) load a non-latest diff, then b) get its `merge_request`, then c) get that MR's `merge_request_diff`, we should get the diff we loaded in c), even though that's not the latest diff. Basically, `MergeRequest#merge_request_diff` is the latest diff in most cases, but not quite all.
1 parent e548c61 commit 991bf24

15 files changed

+197
-8
lines changed

app/controllers/concerns/issuable_collections.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def preload_for_collection
150150
when 'MergeRequest'
151151
[
152152
:source_project, :target_project, :author, :assignee, :labels, :milestone,
153-
head_pipeline: :project, target_project: :namespace, merge_request_diff: :merge_request_diff_commits
153+
head_pipeline: :project, target_project: :namespace, latest_merge_request_diff: :merge_request_diff_commits
154154
]
155155
end
156156
end

app/models/ci/build.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def merge_request
243243

244244
@merge_request ||=
245245
begin
246-
merge_requests = MergeRequest.includes(:merge_request_diff)
246+
merge_requests = MergeRequest.includes(:latest_merge_request_diff)
247247
.where(source_branch: ref,
248248
source_project: pipeline.project)
249249
.reorder(iid: :desc)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
module ManualInverseAssociation
2+
extend ActiveSupport::Concern
3+
4+
module ClassMethods
5+
def manual_inverse_association(association, inverse)
6+
define_method(association) do |*args|
7+
super(*args).tap do |value|
8+
next unless value
9+
10+
child_association = value.association(inverse)
11+
child_association.set_inverse_instance(self)
12+
child_association.target = self
13+
end
14+
end
15+
end
16+
end
17+
end

app/models/merge_request.rb

+37
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ class MergeRequest < ActiveRecord::Base
55
include Referable
66
include IgnorableColumn
77
include TimeTrackable
8+
include ManualInverseAssociation
9+
include EachBatch
810

911
ignore_column :locked_at,
1012
:ref_fetched
@@ -14,9 +16,28 @@ class MergeRequest < ActiveRecord::Base
1416
belongs_to :merge_user, class_name: "User"
1517

1618
has_many :merge_request_diffs
19+
1720
has_one :merge_request_diff,
1821
-> { order('merge_request_diffs.id DESC') }, inverse_of: :merge_request
1922

23+
belongs_to :latest_merge_request_diff, class_name: 'MergeRequestDiff'
24+
manual_inverse_association :latest_merge_request_diff, :merge_request
25+
26+
# This is the same as latest_merge_request_diff unless:
27+
# 1. There are arguments - in which case we might be trying to force-reload.
28+
# 2. This association is already loaded.
29+
# 3. The latest diff does not exist.
30+
#
31+
# The second one in particular is important - MergeRequestDiff#merge_request
32+
# is the inverse of MergeRequest#merge_request_diff, which means it may not be
33+
# the latest diff, because we could have loaded any diff from this particular
34+
# MR. If we haven't already loaded a diff, then it's fine to load the latest.
35+
def merge_request_diff(*args)
36+
fallback = latest_merge_request_diff if args.empty? && !association(:merge_request_diff).loaded?
37+
38+
fallback || super
39+
end
40+
2041
belongs_to :head_pipeline, foreign_key: "head_pipeline_id", class_name: "Ci::Pipeline"
2142

2243
has_many :events, as: :target, dependent: :destroy # rubocop:disable Cop/ActiveRecordDependent
@@ -167,6 +188,22 @@ def self.in_projects(relation)
167188
where("merge_requests.id IN (#{union.to_sql})") # rubocop:disable GitlabSecurity/SqlInjection
168189
end
169190

191+
# This is used after project import, to reset the IDs to the correct
192+
# values. It is not intended to be called without having already scoped the
193+
# relation.
194+
def self.set_latest_merge_request_diff_ids!
195+
update = '
196+
latest_merge_request_diff_id = (
197+
SELECT MAX(id)
198+
FROM merge_request_diffs
199+
WHERE merge_requests.id = merge_request_diffs.merge_request_id
200+
)'.squish
201+
202+
self.each_batch do |batch|
203+
batch.update_all(update)
204+
end
205+
end
206+
170207
WIP_REGEX = /\A\s*(\[WIP\]\s*|WIP:\s*|WIP\s+)+\s*/i.freeze
171208

172209
def self.work_in_progress?(title)

app/models/merge_request_diff.rb

+4-1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ class MergeRequestDiff < ActiveRecord::Base
22
include Sortable
33
include Importable
44
include Gitlab::EncodingHelper
5+
include ManualInverseAssociation
56

67
# Prevent store of diff if commits amount more then 500
78
COMMITS_SAFE_SIZE = 100
@@ -10,6 +11,8 @@ class MergeRequestDiff < ActiveRecord::Base
1011
VALID_CLASSES = [Hash, Rugged::Patch, Rugged::Diff::Delta].freeze
1112

1213
belongs_to :merge_request
14+
manual_inverse_association :merge_request, :merge_request_diff
15+
1316
has_many :merge_request_diff_files, -> { order(:merge_request_diff_id, :relative_order) }
1417
has_many :merge_request_diff_commits, -> { order(:merge_request_diff_id, :relative_order) }
1518

@@ -194,7 +197,7 @@ def compare
194197
end
195198

196199
def latest?
197-
self == merge_request.merge_request_diff
200+
self.id == merge_request.latest_merge_request_diff_id
198201
end
199202

200203
def compare_with(sha)

app/services/merge_requests/refresh_service.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def execute(oldrev, newrev, ref)
3535
# target branch manually
3636
def close_merge_requests
3737
commit_ids = @commits.map(&:id)
38-
merge_requests = @project.merge_requests.preload(:merge_request_diff).opened.where(target_branch: @branch_name).to_a
38+
merge_requests = @project.merge_requests.preload(:latest_merge_request_diff).opened.where(target_branch: @branch_name).to_a
3939
merge_requests = merge_requests.select(&:diff_head_commit)
4040

4141
merge_requests = merge_requests.select do |merge_request|
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
title: Make finding most recent merge request diffs more efficient
3+
merge_request:
4+
author:
5+
type: performance
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# This is identical to the stolen background migration, which already has specs.
2+
class PopulateMergeRequestsLatestMergeRequestDiffIdTakeTwo < ActiveRecord::Migration
3+
include Gitlab::Database::MigrationHelpers
4+
5+
DOWNTIME = false
6+
BATCH_SIZE = 1_000
7+
8+
class MergeRequest < ActiveRecord::Base
9+
self.table_name = 'merge_requests'
10+
11+
include ::EachBatch
12+
end
13+
14+
disable_ddl_transaction!
15+
16+
def up
17+
Gitlab::BackgroundMigration.steal('PopulateMergeRequestsLatestMergeRequestDiffId')
18+
19+
update = '
20+
latest_merge_request_diff_id = (
21+
SELECT MAX(id)
22+
FROM merge_request_diffs
23+
WHERE merge_requests.id = merge_request_diffs.merge_request_id
24+
)'.squish
25+
26+
MergeRequest.where(latest_merge_request_diff_id: nil).each_batch(of: BATCH_SIZE) do |relation|
27+
relation.update_all(update)
28+
end
29+
end
30+
end

lib/api/merge_requests.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ def find_merge_requests(args = {})
2121
return merge_requests if args[:view] == 'simple'
2222

2323
merge_requests
24-
.preload(:notes, :author, :assignee, :milestone, :merge_request_diff, :labels, :timelogs)
24+
.preload(:notes, :author, :assignee, :milestone, :latest_merge_request_diff, :labels, :timelogs)
2525
end
2626

2727
params :merge_requests_params do

lib/gitlab/import_export/project_tree_restorer.rb

+2
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@ def create_relations
6060
end
6161
end
6262

63+
@project.merge_requests.set_latest_merge_request_diff_ids!
64+
6365
@saved
6466
end
6567

spec/lib/gitlab/import_export/all_models.yml

+1
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ merge_requests:
8888
- metrics
8989
- timelogs
9090
- head_pipeline
91+
- latest_merge_request_diff
9192
merge_request_diff:
9293
- merge_request
9394
- merge_request_diff_commits

spec/lib/gitlab/import_export/project_tree_restorer_spec.rb

+6
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@
117117
expect(st_commits.first[:committed_date]).to be_kind_of(Time)
118118
end
119119

120+
it 'has the correct data for merge request latest_merge_request_diff' do
121+
MergeRequest.find_each do |merge_request|
122+
expect(merge_request.latest_merge_request_diff_id).to eq(merge_request.merge_request_diffs.maximum(:id))
123+
end
124+
end
125+
120126
it 'has labels associated to label links, associated to issues' do
121127
expect(Label.first.label_links.first.target).not_to be_nil
122128
end
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
require 'spec_helper'
2+
3+
describe ManualInverseAssociation do
4+
let(:model) do
5+
Class.new(MergeRequest) do
6+
belongs_to :manual_association, class_name: 'MergeRequestDiff', foreign_key: :latest_merge_request_diff_id
7+
manual_inverse_association :manual_association, :merge_request
8+
end
9+
end
10+
11+
before do
12+
stub_const("#{described_class}::Model", model)
13+
end
14+
15+
let(:instance) { create(:merge_request).becomes(model) }
16+
17+
describe '.manual_inverse_association' do
18+
context 'when the relation exists' do
19+
before do
20+
instance.create_merge_request_diff
21+
instance.reload
22+
end
23+
24+
it 'loads the relation' do
25+
expect(instance.manual_association).to be_an_instance_of(MergeRequestDiff)
26+
end
27+
28+
it 'does not perform extra queries after loading' do
29+
instance.manual_association
30+
31+
expect { instance.manual_association.merge_request }
32+
.not_to exceed_query_limit(0)
33+
end
34+
35+
it 'passes arguments to the default association method, to allow reloading' do
36+
query_count = ActiveRecord::QueryRecorder.new do
37+
instance.manual_association
38+
instance.manual_association(true)
39+
end.count
40+
41+
expect(query_count).to eq(2)
42+
end
43+
end
44+
45+
context 'when the relation does not return a value' do
46+
it 'does not try to set an inverse' do
47+
expect(instance.manual_association).to be_nil
48+
end
49+
end
50+
end
51+
end

spec/models/merge_request_diff_spec.rb

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818
let!(:first_diff) { mr.merge_request_diff }
1919
let!(:last_diff) { mr.create_merge_request_diff }
2020

21-
it { expect(last_diff.latest?).to be_truthy }
22-
it { expect(first_diff.latest?).to be_falsey }
21+
it { expect(last_diff.reload).to be_latest }
22+
it { expect(first_diff.reload).not_to be_latest }
2323
end
2424

2525
describe '#diffs' do
@@ -29,7 +29,7 @@
2929
context 'when the :ignore_whitespace_change option is set' do
3030
it 'creates a new compare object instead of loading from the DB' do
3131
expect(mr_diff).not_to receive(:load_diffs)
32-
expect(Gitlab::Git::Compare).to receive(:new).and_call_original
32+
expect(mr_diff.compare).to receive(:diffs).and_call_original
3333

3434
mr_diff.raw_diffs(ignore_whitespace_change: true)
3535
end

spec/models/merge_request_spec.rb

+37
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,43 @@
7979
end
8080
end
8181

82+
describe '.set_latest_merge_request_diff_ids!' do
83+
def create_merge_request_with_diffs(source_branch, diffs: 2)
84+
params = {
85+
target_project: project,
86+
target_branch: 'master',
87+
source_project: project,
88+
source_branch: source_branch
89+
}
90+
91+
create(:merge_request, params).tap do |mr|
92+
diffs.times { mr.merge_request_diffs.create }
93+
end
94+
end
95+
96+
let(:project) { create(:project) }
97+
98+
it 'sets IDs for merge requests, whether they are already set or not' do
99+
merge_requests = [
100+
create_merge_request_with_diffs('feature'),
101+
create_merge_request_with_diffs('feature-conflict'),
102+
create_merge_request_with_diffs('wip', diffs: 0),
103+
create_merge_request_with_diffs('csv')
104+
]
105+
106+
merge_requests.take(2).each do |merge_request|
107+
merge_request.update_column(:latest_merge_request_diff_id, nil)
108+
end
109+
110+
expected = merge_requests.map do |merge_request|
111+
merge_request.merge_request_diffs.maximum(:id)
112+
end
113+
114+
expect { project.merge_requests.set_latest_merge_request_diff_ids! }
115+
.to change { merge_requests.map { |mr| mr.reload.latest_merge_request_diff_id } }.to(expected)
116+
end
117+
end
118+
82119
describe '#target_branch_sha' do
83120
let(:project) { create(:project, :repository) }
84121

0 commit comments

Comments
 (0)