~sbaildon/lexical

bb1c1e3f66160653f95bd05af7589297233fec96 — Steve Cohen 3 months ago f114506
Fixing crash when dealing with unicode files (#672)

The change to binread introduced a fairly complicated unicode bug that
would cause decoding of messages to partially fail. This was due to
the fact that we were previously using read, which would occasionally
return unencoded data. We patched this by doing a utf8 -> latin1
conversion, but this would fail once we acutally read the bytes rather
than going through elixir's utf8 encoding friendly IO.read functions.

The change here is to get rid of the encoding step, as we're reading
the raw bytes as we should.
1 files changed, 2 insertions(+), 5 deletions(-)

M apps/server/lib/lexical/server/transport/std_io.ex
M apps/server/lib/lexical/server/transport/std_io.ex => apps/server/lib/lexical/server/transport/std_io.ex +2 -5
@@ 13,7 13,7 @@ defmodule Lexical.Server.Transport.StdIO do
  end

  def init({callback, device}) do
    :io.setopts([:binary, encoding: :latin1])
    :io.setopts(binary: true, encoding: :latin1)
    loop([], device, callback)
  end



@@ 55,7 55,7 @@ defmodule Lexical.Server.Transport.StdIO do
  # private

  defp loop(buffer, device, callback) do
    case IO.read(device, :line) do
    case IO.binread(device, :line) do
      "\n" ->
        headers = parse_headers(buffer)



@@ 90,9 90,6 @@ defmodule Lexical.Server.Transport.StdIO do
  defp read_body(device, byte_count) do
    case IO.binread(device, byte_count) do
      data when is_binary(data) or is_list(data) ->
        # Ensure that incoming data is latin1 to prevent double-encoding to utf8 later
        # See https://github.com/lexical-lsp/lexical/issues/287 for context.
        data = :unicode.characters_to_binary(data, :utf8, :latin1)
        {:ok, data}

      other ->