Skip to content

Commit c868f28

Browse files
committed
Add limited caching of rules for expressions
Expression rules were doing DB lookups every time they were used. This caches rules just before the initial audit or a commit audit. This is weak caching and shouldn't affect the analysts ability to change rules on the fly. Highly scientific emperical studies show this change saves about eleventy billion DB lookups and keeps my laptop from getting so hot I can cook with it.
1 parent 390d7f8 commit c868f28

File tree

7 files changed

+73
-65
lines changed

7 files changed

+73
-65
lines changed

app/models/rules.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def validate
3737
begin
3838
Boolean.parse(dummy)
3939

40-
exp = ExpressionRule.new(value)
40+
exp = ExpressionRule.new(value, [])
4141
exp.rule_names.each do |exp_rule_name|
4242
next if Rules[name: exp_rule_name]
4343
errors.add(:value, "referenced rule does not exist: #{exp_rule_name}")

app/workers/commit_auditor.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,13 @@ def perform(project_id, commit, rules, github_token)
3030
diff = get_commit_diff(commit, rules, github_token)
3131

3232
builder = AuditResultsBuilder.new
33-
record = builder.build(project_id, commit, rules, diff)
33+
results = builder.build(project_id, commit, diff, rules)
3434
return unless record
3535

3636
begin
37-
Commits.insert(record)
37+
Commits.insert(results)
3838
rescue Sequel::UniqueConstraintViolation
39-
Rails.logger.debug "Dropping duplicate commit: #{record}"
39+
Rails.logger.debug "Dropping duplicate commit: #{results}"
4040
end
4141
end
4242

app/workers/initial_auditor.rb

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ def perform(project_id, project_name, rules)
3131
tmpdir = Dir.mktmpdir(['cwatcher', project_name.sub('/', '-')])
3232
commits = []
3333
begin
34-
git = clone(project_name, tmpdir)
3534
builder = AuditResultsBuilder.new
35+
git = clone(project_name, tmpdir)
3636
# Log helpfully forces a limit which defaults to 30.
3737
git_commits = git.log(100000000)
3838
Rails.logger.debug "Collected #{git_commits.size} commits from #{project_name}"
@@ -53,13 +53,13 @@ def perform(project_id, project_name, rules)
5353

5454
commit = build_commit_hash(c)
5555
commits << commit
56-
record = builder.build(project_id, commit, rules, diff)
57-
next unless record
56+
results = builder.build(project_id, commit, diff, rules)
57+
next unless results
5858

5959
begin
60-
Commits.create(record)
60+
Commits.create(results)
6161
rescue Sequel::UniqueConstraintViolation
62-
Rails.logger.debug "Dropping duplicate commit: #{record}"
62+
Rails.logger.debug "Dropping duplicate commit: #{results}"
6363
end
6464
end
6565
ensure

app/workers/project_enqueuer.rb

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ def perform
4141

4242
def enqueue_projects(projects, audit_frequency, github_token)
4343
projects.each do |project|
44-
rule_sets = get_rules(JSON.parse(project[:rule_sets]))
44+
rules = get_rules(JSON.parse(project[:rule_sets]))
4545

4646
# Update next_audit immediately to avoid re-enqueueing.
4747
next_audit = Time.now.to_i + audit_frequency
@@ -57,14 +57,14 @@ def enqueue_projects(projects, audit_frequency, github_token)
5757
InitialAuditor.perform_async(
5858
project[:id],
5959
project[:name],
60-
rule_sets.to_json
60+
rules.to_json
6161
)
6262
else
6363
CommitCollector.perform_async(
6464
project[:id],
6565
project[:name],
6666
last_commit_time,
67-
rule_sets.to_json,
67+
rules.to_json,
6868
github_token
6969
)
7070
end
@@ -74,6 +74,7 @@ def enqueue_projects(projects, audit_frequency, github_token)
7474
def get_rules(rule_set_names)
7575
rule_sets = RuleSets.where(name: rule_set_names).to_hash
7676
rule_names = rule_sets.values.collect { |s| JSON.parse(s[:rules]) }.flatten.sort.uniq
77+
7778
Rules.where(name: rule_names).select(:id, :name, :rule_type_id, :value, :notification_id)
7879
end
7980
end

lib/audit_results_builder.rb

Lines changed: 31 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,38 @@
33
require_relative "#{Rails.root}/lib/rules/rule_auditor"
44

55
class AuditResultsBuilder
6-
def build(project_id, commit, rules, diff)
7-
audit_results = []
8-
rules.each do |r|
9-
audit_result = RuleAuditor.audit(
10-
commit,
11-
r[:rule_type_id],
12-
r[:value],
13-
diff
14-
)
15-
next unless audit_result
6+
def initialize
7+
@all_rules = Rules.collect { |r| r.to_hash }
8+
end
169

17-
audit_results << {
18-
rule_id: r[:id],
19-
rule_name: r[:name],
20-
rule_type_id: r[:rule_type_id],
21-
rule_value: r[:value],
22-
notification_id: r[:notification_id],
23-
audit_result: audit_result,
24-
}
25-
end
26-
return if audit_results.empty?
10+
def build(project_id, commit, diff, rules)
11+
audit_results = []
12+
auditor = RuleAuditor.new(@all_rules)
13+
rules.each do |r|
14+
audit_result = auditor.audit(
15+
commit,
16+
r[:rule_type_id],
17+
r[:value],
18+
diff
19+
)
20+
next unless audit_result
2721

28-
{
29-
project_id: project_id,
30-
commit_hash: commit[:sha],
31-
commit_date: Time.iso8601(commit[:commit][:author][:date]),
32-
audit_results: audit_results.to_json,
33-
}
22+
audit_results << {
23+
rule_id: r[:id],
24+
rule_name: r[:name],
25+
rule_type_id: r[:rule_type_id],
26+
rule_value: r[:value],
27+
notification_id: r[:notification_id],
28+
audit_result: audit_result,
29+
}
3430
end
31+
return if audit_results.empty?
32+
33+
{
34+
project_id: project_id,
35+
commit_hash: commit[:sha],
36+
commit_date: Time.iso8601(commit[:commit][:author][:date]),
37+
audit_results: audit_results.to_json,
38+
}
39+
end
3540
end

lib/rules/expression_rule.rb

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
1-
class ExpressionRule
2-
attr_reader :rule_names
1+
require_relative 'rule_auditor'
32

4-
def initialize(expression)
3+
class ExpressionRule
4+
def initialize(expression, all_rules)
55
@expression = expression
6-
@rule_names = parse_rule_names
7-
@rules = nil
6+
@all_rules = all_rules
7+
@auditor = RuleAuditor.new(@all_rules)
8+
@rule_name_to_rule = Hash[@all_rules.map { |r| [r[:name], r] }]
89
end
910

1011
def evaluate(commit, diff)
11-
@rules ||= Rules.where(name: @rule_names)
12-
1312
all_results = []
1413

1514
if diff
@@ -25,13 +24,15 @@ def evaluate(commit, diff)
2524
all_results.empty? ? nil : all_results
2625
end
2726

28-
private
27+
def rule_names
28+
@expression.tr('!&|()', '').split(/\s+/).uniq.sort
29+
end
2930

3031
def evaluate_patch(commit, patch)
31-
# Todo change to collect
3232
results = []
33-
resolved_expr = "#{@expression}"
34-
@rules.each do |rule|
33+
resolved_expr = @expression.to_s
34+
rule_names.each do |rule_name|
35+
rule = @rule_name_to_rule[rule_name]
3536
resolved_expr, result = resolve_rule(rule, resolved_expr, commit, patch)
3637
results << result
3738
end
@@ -48,18 +49,14 @@ def resolve_rule(rule, resolved_expr, commit, patch)
4849
bool_value = !result.nil?
4950

5051
[
51-
resolved_expr.gsub(rule[:name], "#{bool_value}"),
52+
resolved_expr.gsub(rule[:name], bool_value.to_s),
5253
# Store result even if nil. Expression could be "!someRule"
5354
{ rule_name: rule[:name], result: result }
5455
]
5556
end
5657

57-
def parse_rule_names
58-
@expression.tr('!&|()', '').split(/\s+/).uniq.sort
59-
end
60-
6158
def evaluate_rule(rule, commit, diff)
62-
RuleAuditor.audit(
59+
@auditor.audit(
6360
commit,
6461
rule[:rule_type_id],
6562
rule[:value],

lib/rules/rule_auditor.rb

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
require_relative 'expression_rule'
22

33
class RuleAuditor
4-
# The actual content of the line is not exposed. *shrug*
4+
# The actual content of the line is not publicly exposed.
55
GitDiffParser::Line.class_eval { attr_reader :content }
66

7-
def self.audit(commit, rule_type_id, rule_value, diff)
7+
def initialize(all_rules)
8+
# Used for expression rules so it doesn't have to look them up
9+
@all_rules = all_rules
10+
end
11+
12+
def audit(commit, rule_type_id, rule_value, diff)
813
case rule_type_id
914
when 1
1015
return unless diff
@@ -28,13 +33,13 @@ def self.audit(commit, rule_type_id, rule_value, diff)
2833

2934
private
3035

31-
def self.audit_filename_pattern(pattern, diff)
36+
def audit_filename_pattern(pattern, diff)
3237
filenames = diff.collect { |e| e.file }
3338
results = filenames.select { |e| e =~ pattern }
3439
results.empty? ? nil : results
3540
end
3641

37-
def self.audit_changed_code_pattern(pattern, diff)
42+
def audit_changed_code_pattern(pattern, diff)
3843
results = []
3944
diff.each do |d|
4045
matches = d.body.scan(pattern)
@@ -81,7 +86,7 @@ def self.audit_changed_code_pattern(pattern, diff)
8186
results.empty? ? nil : results
8287
end
8388

84-
def self.audit_code_pattern(pattern, diff)
89+
def audit_code_pattern(pattern, diff)
8590
results = []
8691
diff.each do |d|
8792
matches = d.body.scan(pattern)
@@ -111,24 +116,24 @@ def self.audit_code_pattern(pattern, diff)
111116
results.empty? ? nil : results
112117
end
113118

114-
def self.audit_message_pattern(commit, pattern)
119+
def audit_message_pattern(commit, pattern)
115120
message = commit[:commit][:message]
116121
(message =~ pattern) ? message : nil
117122
end
118123

119-
def self.audit_author_pattern(commit, pattern)
124+
def audit_author_pattern(commit, pattern)
120125
author_name = commit[:commit][:author][:name]
121126
author_email = commit[:commit][:author][:email]
122127
author = "#{author_name} <#{author_email}>"
123128
(author =~ pattern) ? author : nil
124129
end
125130

126-
def self.audit_expression(commit, expression, diff)
127-
rule = ExpressionRule.new(expression)
131+
def audit_expression(commit, expression, diff)
132+
rule = ExpressionRule.new(expression, @all_rules)
128133
rule.evaluate(commit, diff)
129134
end
130135

131-
def self.audit_commit_pattern(commit, pattern, diff)
136+
def audit_commit_pattern(commit, pattern, diff)
132137
results = []
133138
results << audit_message_pattern(commit, pattern)
134139
results << audit_code_pattern(pattern, diff) if diff

0 commit comments

Comments
 (0)