Skip to content

Commit 56f4841

Browse files
AriaXLicthorn42
authored andcommitted
(FACT-3497) Facter should not fail when binary data is detected
This commit updates LegacyFacter::Util::Normalization.normalize_string to accept fact values with ASCII 8-BIT without immediately failing. When a fact value with ASCII 8-BIT is detected but can be force encoded to UTF-8, Facter will log a debug message with the fact name and value. Facter will only error (with the fact name & value) when the fact value cannot be successfully force encoded to UTF-8. This commit also updates all the normalize methods in LegacyFacter::Util::Normalization to take in 2 parameters now: the fact name & value so the debug and error messages can include the fact name. This helps users more easily figure out which fact is causing issues and has binary.
1 parent d0db933 commit 56f4841

File tree

5 files changed

+58
-32
lines changed

5 files changed

+58
-32
lines changed

lib/facter/custom_facts/core/resolvable.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ def value
7979
end
8080
end
8181

82-
LegacyFacter::Util::Normalization.normalize(result)
82+
LegacyFacter::Util::Normalization.normalize(@fact.name, result)
8383
rescue Timeout::Error => e
8484
Facter.log_exception(e, "Timed out after #{limit} seconds while resolving #{qualified_name}")
8585

lib/facter/custom_facts/util/fact.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ class Fact
3535
#
3636
# @api private
3737
def initialize(name, options = {})
38-
@name = LegacyFacter::Util::Normalization.normalize(name.to_s.downcase).intern
38+
@name = LegacyFacter::Util::Normalization.normalize(name.to_s.downcase, name.to_s.downcase).intern
3939

4040
@options = options.dup
4141
extract_ldapname_option!(options)

lib/facter/custom_facts/util/normalization.rb

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -17,18 +17,18 @@ class NormalizationError < StandardError; end
1717
# @api public
1818
# @raise [NormalizationError] If the data structure contained an invalid element.
1919
# @return [void]
20-
def normalize(value)
20+
def normalize(fact_name, value)
2121
case value
2222
when Integer, Float, TrueClass, FalseClass, NilClass, Symbol
2323
value
2424
when Date, Time
2525
value.iso8601
2626
when String
27-
normalize_string(value)
27+
normalize_string(fact_name, value)
2828
when Array
29-
normalize_array(value)
29+
normalize_array(fact_name, value)
3030
when Hash
31-
normalize_hash(value)
31+
normalize_hash(fact_name, value)
3232
else
3333
raise NormalizationError, "Expected #{value} to be one of #{VALID_TYPES.inspect}, but was #{value.class}"
3434
end
@@ -47,12 +47,17 @@ def normalize(value)
4747
# @param value [String]
4848
# @return [void]
4949

50-
def normalize_string(value)
50+
def normalize_string(fact_name, value)
5151
if value.encoding == Encoding::ASCII_8BIT
52-
raise NormalizationError, "String #{value.inspect} contains binary data"
53-
end
52+
value_copy_encoding = value.dup.force_encoding(Encoding::UTF_8)
53+
raise NormalizationError, "custom fact \"#{fact_name}\" with value: #{value_copy_encoding} contains binary data and could not be force encoded to UTF-8" unless value_copy_encoding.valid_encoding?
54+
55+
log.debug("custom fact \"#{fact_name}\" with value: #{value} contained binary data and was force encoded to UTF-8 and now has the value: #{value_copy_encoding}")
56+
value = value_copy_encoding
5457

55-
value = value.encode(Encoding::UTF_8)
58+
else
59+
value = value.encode(Encoding::UTF_8)
60+
end
5661

5762
unless value.valid_encoding?
5863
raise NormalizationError, "String #{value.inspect} doesn't match the reported encoding #{value.encoding}"
@@ -73,9 +78,9 @@ def normalize_string(value)
7378
# @raise [NormalizationError] If one of the elements failed validation
7479
# @param value [Array]
7580
# @return [void]
76-
def normalize_array(value)
81+
def normalize_array(fact_name, value)
7782
value.collect do |elem|
78-
normalize(elem)
83+
normalize(fact_name, elem)
7984
end
8085
end
8186

@@ -85,8 +90,12 @@ def normalize_array(value)
8590
# @raise [NormalizationError] If one of the keys or values failed normalization
8691
# @param value [Hash]
8792
# @return [void]
88-
def normalize_hash(value)
89-
Hash[value.collect { |k, v| [normalize(k), normalize(v)] }]
93+
def normalize_hash(fact_name, value)
94+
Hash[value.collect { |k, v| [normalize(fact_name, k), normalize(fact_name, v)] }]
95+
end
96+
97+
def log
98+
Facter::Log.new(self)
9099
end
91100
end
92101
end

lib/facter/framework/core/fact_manager.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def resolve_core(user_query = [], options = {})
8080

8181
def validate_resolved_facts(resolved_facts)
8282
resolved_facts.each do |fact|
83-
normalize(fact.value)
83+
normalize(fact.name, fact.value)
8484
rescue LegacyFacter::Util::Normalization::NormalizationError => e
8585
detail = e.message
8686
detail += "\n#{e.backtrace.join("\n")}" if @options[:trace]

spec/custom_facts/util/normalization_spec.rb

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -23,48 +23,65 @@ def utf8(str)
2323
describe 'and string encoding is supported', if: String.instance_methods.include?(:encoding) do
2424
it 'accepts strings that are ASCII and match their encoding and converts them to UTF-8' do
2525
str = 'ASCII'.encode(Encoding::ASCII)
26-
normalized_str = normalization.normalize(str)
26+
normalized_str = normalization.normalize('', str)
2727
expect(normalized_str.encoding).to eq(Encoding::UTF_8)
2828
end
2929

30+
it 'accepts ASCII 8-BIT (binary) string that can be successfully force encoded to UTF-8 and logs a debug message with the fact name and value and returns UTF-8 encoded version of the passed in string' do
31+
str = 'test'.encode(Encoding::ASCII_8BIT)
32+
logger = Facter::Log.new(self)
33+
allow(normalization).to receive(:log).and_return logger # rubocop:disable RSpec/SubjectStub
34+
expect(logger).to receive(:debug).with(/custom fact "" with value: #{str} contained binary data and was force encoded to UTF-8 and now has the value: #{str}/)
35+
value = normalization.normalize('', str)
36+
expect(value).to eq('test')
37+
expect(value.encoding).to eq(Encoding::UTF_8)
38+
end
39+
40+
it 'errors when given binary string that cannot be successfully force encoded to UTF-8' do
41+
str = "\xc0".dup.force_encoding(Encoding::ASCII_8BIT) # rubocop:disable Performance/UnfreezeString
42+
expect do
43+
normalization.normalize('', str)
44+
end.to raise_error(LegacyFacter::Util::Normalization::NormalizationError)
45+
end
46+
3047
it 'accepts strings that are UTF-8 and match their encoding' do
3148
str = "let's make a ☃!".encode(Encoding::UTF_8)
32-
expect(normalization.normalize(str)).to eq(str)
49+
expect(normalization.normalize('', str)).to eq(str)
3350
end
3451

3552
it 'converts valid non UTF-8 strings to UTF-8' do
3653
str = "let's make a ☃!".encode(Encoding::UTF_16LE)
37-
enc = normalization.normalize(str).encoding
54+
enc = normalization.normalize('', str).encoding
3855
expect(enc).to eq(Encoding::UTF_8)
3956
end
4057

4158
it 'normalizes a frozen string returning a non-frozen string' do
4259
str = 'factvalue'.encode(Encoding::UTF_16LE).freeze
4360

44-
normalized_str = normalization.normalize(str)
61+
normalized_str = normalization.normalize('', str)
4562
expect(normalized_str).not_to be_frozen
4663
end
4764

4865
it 'rejects strings that are not UTF-8 and do not match their claimed encoding' do
4966
invalid_shift_jis = (+"\xFF\x5C!").force_encoding(Encoding::SHIFT_JIS)
5067
expect do
51-
normalization.normalize(invalid_shift_jis)
68+
normalization.normalize('', invalid_shift_jis)
5269
end.to raise_error(LegacyFacter::Util::Normalization::NormalizationError,
5370
/String encoding Shift_JIS is not UTF-8 and could not be converted to UTF-8/)
5471
end
5572

5673
it "rejects strings that claim to be UTF-8 encoded but aren't" do
5774
str = (+"\255ay!").force_encoding(Encoding::UTF_8)
5875
expect do
59-
normalization.normalize(str)
76+
normalization.normalize('', str)
6077
end.to raise_error(LegacyFacter::Util::Normalization::NormalizationError,
6178
/String "\\xADay!" doesn't match the reported encoding UTF-8/)
6279
end
6380

6481
it 'rejects strings that only have the start of a valid UTF-8 sequence' do
6582
str = "\xc3\x28"
6683
expect do
67-
normalization.normalize(str)
84+
normalization.normalize('', str)
6885
end.to raise_error(LegacyFacter::Util::Normalization::NormalizationError,
6986
/String "\\xC3\(" doesn't match the reported encoding UTF-8/)
7087
end
@@ -73,7 +90,7 @@ def utf8(str)
7390
str = 'Mitteleuropäische Zeit'.encode(Encoding::UTF_16LE)
7491
str.force_encoding(Encoding::UTF_8)
7592
expect do
76-
normalization.normalize(str)
93+
normalization.normalize('', str)
7794
end.to raise_error(LegacyFacter::Util::Normalization::NormalizationError,
7895
/String.*doesn't match the reported encoding UTF-8/)
7996
end
@@ -85,7 +102,7 @@ def utf8(str)
85102
"null byte \x00 in the middle"]
86103
test_strings.each do |str|
87104
expect do
88-
normalization.normalize(str)
105+
normalization.normalize('', str)
89106
end.to raise_error(LegacyFacter::Util::Normalization::NormalizationError,
90107
/String.*contains a null byte reference/)
91108
end
@@ -95,13 +112,13 @@ def utf8(str)
95112
describe 'and string encoding is not supported', unless: String.instance_methods.include?(:encoding) do
96113
it 'accepts strings that are UTF-8 and match their encoding' do
97114
str = "let's make a ☃!"
98-
expect(normalization.normalize(str)).to eq(str)
115+
expect(normalization.normalize('', str)).to eq(str)
99116
end
100117

101118
it 'rejects strings that are not UTF-8' do
102119
str = "let's make a \255\255\255!"
103120
expect do
104-
normalization.normalize(str)
121+
normalization.normalize('', str)
105122
end.to raise_error(LegacyFacter::Util::Normalization::NormalizationError, /String .* is not valid UTF-8/)
106123
end
107124
end
@@ -112,7 +129,7 @@ def utf8(str)
112129
arr = [utf16('first'), utf16('second'), [utf16('third'), utf16('fourth')]]
113130
expected_arr = [utf8('first'), utf8('second'), [utf8('third'), utf8('fourth')]]
114131

115-
expect(normalization.normalize_array(arr)).to eq(expected_arr)
132+
expect(normalization.normalize_array('', arr)).to eq(expected_arr)
116133
end
117134
end
118135

@@ -121,7 +138,7 @@ def utf8(str)
121138
hsh = { utf16('first') => utf16('second'), utf16('third') => [utf16('fourth'), utf16('fifth')] }
122139
expected_hsh = { utf8('first') => utf8('second'), utf8('third') => [utf8('fourth'), utf8('fifth')] }
123140

124-
expect(normalization.normalize_hash(hsh)).to eq(expected_hsh)
141+
expect(normalization.normalize_hash('', hsh)).to eq(expected_hsh)
125142
end
126143
end
127144

@@ -131,7 +148,7 @@ def utf8(str)
131148
value = Time.utc(2020)
132149
expected = '2020-01-01T00:00:00Z'
133150

134-
expect(normalization.normalize(value)).to eq(expected)
151+
expect(normalization.normalize('', value)).to eq(expected)
135152
end
136153
end
137154

@@ -140,20 +157,20 @@ def utf8(str)
140157
value = Date.new(2020)
141158
expected = '2020-01-01'
142159

143-
expect(normalization.normalize(value)).to eq(expected)
160+
expect(normalization.normalize('', value)).to eq(expected)
144161
end
145162
end
146163

147164
[1, 1.0, true, false, nil, :symbol].each do |val|
148165
it "accepts #{val.inspect}:#{val.class}" do
149-
expect(normalization.normalize(val)).to eq(val)
166+
expect(normalization.normalize('', val)).to eq(val)
150167
end
151168
end
152169

153170
[Object.new, Set.new].each do |val|
154171
it "rejects #{val.inspect}:#{val.class}" do
155172
expect do
156-
normalization.normalize(val)
173+
normalization.normalize('', val)
157174
end.to raise_error(LegacyFacter::Util::Normalization::NormalizationError, /Expected .*but was #{val.class}/)
158175
end
159176
end

0 commit comments

Comments
 (0)