~singpolyma/dhall-ruby

df3138fd2e83f0ba079b91fe923cdeab2c319281 — Stephen Paul Weber 5 years ago 5f39939
Update text-or-binary heuristic

The `cbor` gem is a bit janky and it turns out catching NoMemoryError is
not safe, so use an encoding-based heuristic to detect binary vs utf8
data.
2 files changed, 24 insertions(+), 10 deletions(-)

M lib/dhall.rb
M lib/dhall/util.rb
M lib/dhall.rb => lib/dhall.rb +5 -10
@@ 22,18 22,13 @@ module Dhall
	end

	def self.load_raw(source)
		unless source.valid_encoding?
			raise ArgumentError, "invalid byte sequence in #{source.encoding}"
		end
		source = Util.text_or_binary(source)

		begin
			return from_binary(source) if source.encoding == Encoding::BINARY
		rescue Exception # rubocop:disable Lint/RescueException
			# Parsing CBOR failed, so guess this is source text in standard UTF-8
			return load_raw(source.force_encoding("UTF-8"))
		if source.encoding == Encoding::BINARY
			from_binary(source)
		else
			Parser.parse(source).value
		end

		Parser.parse(source.encode("UTF-8")).value
	end

	def self.dump(o)

M lib/dhall/util.rb => lib/dhall/util.rb +19 -0
@@ 116,5 116,24 @@ module Dhall

			Hash[hash_or_not.map { |k, v| [(yield k), v] }]
		end

		def self.utf8_if_possible(str)
			utf8 = str.dup.force_encoding(Encoding::UTF_8)
			utf8.valid_encoding? ? utf8 : str
		end

		def self.text_or_binary(str)
			unless str.valid_encoding?
				raise ArgumentError, "invalid byte sequence in #{str.encoding}"
			end

			if str.encoding == Encoding::BINARY
				return str if str =~ /(?!\s)[[:cntrl:]]/

				utf8_if_possible(str)
			else
				str.encode(Encoding::UTF_8)
			end
		end
	end
end