~cypheon/ocaml-apfs

61b44ed33d0b50533d003402e5de4e07aa8bafc6 — Johann Rudloff 1 year, 7 months ago 7615aa2
Do simple (ASCII-only) case folding when calculating file name hashes.
3 files changed, 24 insertions(+), 3 deletions(-)

M lib/apfs.ml
M lib/structs/types.ml
M lib/util.ml
M lib/apfs.ml => lib/apfs.ml +13 -1
@@ 546,7 546,19 @@ let lwt_main () =
  Format.printf "stat:\n%a\n" LoFS.pp_stat dir_stat;
  Lwt.return ()

let test_name_hash hashstr reference = Printf.printf
    "hash of %s = 0x%08lx (should be 0x%08lx)\n"
    hashstr
    (Int32.logand 0x3fffffl (Util.name_hash hashstr))
    (Int32.logand 0x3fffffl (Int32.shift_right_logical reference 10))

let apfs_main () =
  let hashstr = "xattr.c" in
  Lwt_main.run (lwt_main ());
  Printf.printf "hash of %s = 0x%08lx (should be 0x%08x)" hashstr (Int32.logand 0x3fffffl (Util.name_hash hashstr)) 660964
  test_name_hash "include" 0x50c8a808l;
  test_name_hash "block" 0xfa85d006l;
  test_name_hash "README" 2114050055l;
  test_name_hash "xattr.c" 0x28579008l;
  test_name_hash "xattr.c" 660964l;
  test_name_hash ".gitattributes" 0xafb0fc0fl;
  ()

M lib/structs/types.ml => lib/structs/types.ml +1 -1
@@ 858,7 858,7 @@ end
module FsObject = struct
  type drec_key = {
    name: string;
    name_len_and_hash: int32;
    name_len_and_hash: int32 [@printer fun fmt -> fprintf fmt "0x%08lxl"];
  }
  [@@deriving show]


M lib/util.ml => lib/util.ml +10 -1
@@ 1,10 1,19 @@
let ascii_lowercase c =
  if Uchar.is_char c then
    c |> Uchar.to_char |> Char.lowercase_ascii |> Uchar.of_char
  else c
let name_hash str =
  let len = ref 0 in
  let buf = Cstruct.create_unsafe (4 * (1 +String.length str)) in
  let dec = Uutf.(decoder ~encoding:`UTF_8 (`String str)) in
  (* TODO: normalize to NFD *)
  let rec loop d = match Uutf.decode d with
  | `Uchar uc -> (Cstruct.LE.set_uint32 buf (4 * !len) (Int32.of_int (Uchar.to_int uc))); len := (!len + 1); loop d
  (* TODO: proper Unicode-aware case folding *)
  | `Uchar uc -> let lowercased = ascii_lowercase uc in
                 (Cstruct.LE.set_uint32 buf (4 * !len) (Int32.of_int (Uchar.to_int lowercased)));
                 (*Printf.printf "%d: %c (0x%x)\n" (!len) (Uchar.to_char uc) (Uchar.to_int uc);*)
                 len := (!len + 1);
                 loop d
  | `End -> !len
  | `Await -> assert false
  | `Malformed _ -> assert false