~cypheon/ocaml-apfs

ref: 7615aa2c08b13051a09c4478826d44ea60bbe679 ocaml-apfs/lib/apfs.ml -rw-r--r-- 17.1 KiB
7615aa2c — Johann Rudloff Remove .merlin files. 1 year, 9 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
open Apfs_structs.Types
open Stdlib.Result

type container = {
  cnt_superblock: Apfs_structs.Types.Nx_superblock.t;
  cnt_omap: Apfs_structs.Types.OmapBtree.t;
}

let (let*) = Lwt.bind
let (let>) = Lwt_result.bind
let (let*>) = Lwt_result.bind_lwt

exception ChecksumError of int64

let rec addr_range start = function
  | 0 -> []
  | n -> start::Int64.(addr_range (succ start) (n - 1))

let max_by f a b =
  if (Stdlib.compare (f a) (f b)) > 0 then a else b

let list_max_by_opt f = function
  | [] -> None
  | x::xs -> Some (
    List.fold_left (max_by f) x xs
  )

let list_max_by f xs = list_max_by_opt f xs |> Option.get

let find_latest_sb_obj sb_objs =
  List.filter_map (
    function
      | (obj_phys, `NX_SUPERBLOCK sb) -> Some (obj_phys, sb)
      | _ -> None
  ) sb_objs
  |> list_max_by (fun obj -> (fst obj).Obj_phys.o_xid)

exception NotImplemented of string

module type BlockDevice = sig
  type t
  val read_block: t -> ?size:int -> int64 -> Cstruct.t Lwt.t
end

module type ObjectPool = sig
  type t
  type omap_t

  val read_object: t -> ?size:int -> Apfs_structs.Types.any_oid -> Apfs_structs.Types.Object.t Lwt.t
  val read_block: t -> ?size:int -> Apfs_structs.Types.any_oid -> (Obj_phys.t * Cstruct.t) Lwt.t
end

module DeviceUtils (B : BlockDevice) = struct
  include B

  let read_obj_raw dev ?size addr =
    let* block = B.read_block dev ?size:size addr in
    let checksum_buf = Cstruct.(shift block 8 |> to_bigarray) in
    let f64 = Fletcher.fletcher64 checksum_buf in
    let obj_header = Obj_phys.of_buffer block in
    if obj_header.o_cksum = f64 then (Lwt.return (ok (obj_header, block)))
    else Lwt.return (error (ChecksumError addr))

  let read_obj_raw_raise dev ?size addr =
    let* result = read_obj_raw dev ?size:size addr in
    match result with
    | Ok x ->
      (*Printf.printf "read raw obj: (phys block %Ld) %s\n" addr (Obj_phys.show (Obj_phys.of_buffer (snd x)));*)
      Lwt.return x
    | Error e -> raise e

  let read_obj_raise dev ?size addr =
    let* result = read_obj_raw dev ?size:size addr in
    match result with
    | Ok (hdr, buf) -> 
      let obj = obj_of_buffer hdr buf in
      (*Printf.printf "read obj: (phys block %Ld) %s\n" addr (Obj_phys.show hdr);*)
      Lwt.return obj
    | Error e -> raise e
end

module BtreeTraversal (NodeType : Apfs_structs.Types.Btree_node_S) (P : ObjectPool) = struct
  type t = {
    pool: P.t;
    btree_info: Apfs_structs.Types.BtreeMeta.info;
    btree_root: NodeType.t;

    lower_bound: NodeType.k option;
    upper_bound: NodeType.k option;
  }

  let create pool (info, root) = {
    pool;
    btree_info = info;
    btree_root = root;
    lower_bound = None;
    upper_bound = None;
  }

  let limit ?lower_bound ?upper_bound tree = {
    tree with
    lower_bound;
    upper_bound;
  }

  let lookup_floor tree (key:NodeType.k) : (NodeType.k * NodeType.v) option Lwt.t =
    let pool = tree.pool in
    let info = tree.btree_info in
    let rec go bt = begin
      (*Format.printf "begin traverse btree (find %a) in\n%a\n" (NodeType.pp_k) key NodeType.pp bt;*)
      match NodeType.floor_entry bt key with
      | None ->
        (*Format.printf "failed traverse btree (find %a) NOT FOUND in\n%a\n" (NodeType.pp_k) key NodeType.pp bt;*)
        Lwt.return None
      | Some (_k, ChildNode child_oid) ->
        (*Format.printf "traverse btree (find %a) in\n%a\n--->\n %a\n" (NodeType.pp_k) key NodeType.pp bt pp_any_oid child_oid;*)
          let* (_hdr, buf) = P.read_block pool ~size:info.BtreeMeta.bt_node_size child_oid in
        let _, node = NodeType.of_block info buf in
        (*Format.printf "child node read (%s):\n%a\n" (show_any_oid child_oid) NodeType.pp node;*)
        go node
      | Some (k, Value v) ->
          (*Format.printf "traverse btree (find %a) in\n%a\n--->\nReturn %a\n" NodeType.pp_k key NodeType.pp bt NodeType.pp_v v;*)
          Lwt.return (Some (k,v))
      end in
    go tree.btree_root

  let find tree (key:NodeType.k) : (NodeType.k * NodeType.v) option Lwt.t =
    let* floor_entry = lookup_floor tree key in
    match floor_entry with
    | None -> Lwt.return None
    | Some (k, _) -> Lwt.return (if NodeType.compare_key k key = 0 then floor_entry else None)

  let fold_s f acc tree =
    let pool = tree.pool in
    let info = tree.btree_info in
    let lower_bound = tree.lower_bound in
    let check_lower = match lower_bound with
      | None -> fun _ -> true
      | Some lower -> fun k -> (NodeType.compare_key k lower) >= 0
    in
    let upper_bound = tree.upper_bound in
    let check_upper = match upper_bound with
      | None -> fun _ -> true
      | Some upper -> fun k -> (NodeType.compare_key k upper) <= 0
    in
    let rec go = (fun bt acc ->
      let s = match lower_bound with
        | None -> NodeType.stream bt
        | Some l -> NodeType.stream_from l bt
      in
      let limited_stream () = match upper_bound with
        | None -> s ()
        | Some upper -> (match s () with
          | None -> None
          | Some (k, v) -> if check_upper k then Some (k, v) else None
        ) in
      let lwt_s = Lwt_stream.from_direct limited_stream in
      Lwt_stream.fold_s (fun (k,v) acc -> match v with
        | (Value v) ->
            if (check_lower k) && (check_upper k) then f (k,v) acc
            else Lwt.return acc
        | (ChildNode child_oid) ->
          (*Printf.printf "recurse: %s\n" (show_any_oid child_oid);*)
          let* (hdr, buf) = P.read_block pool ~size:info.BtreeMeta.bt_node_size child_oid in
          (*Format.printf "child obj read: %s = %a\n" (show_any_oid child_oid) Obj_phys.pp hdr;*)
          let _, node = NodeType.of_block info buf in
          (*Format.printf "go to child node: %a\n" NodeType.pp node;*)
          go node acc
      ) lwt_s acc
    ) in
    go tree.btree_root acc

  let fold f acc tree =
    let f' x a = Lwt.return (f x a) in
    fold_s f' acc tree

  let iter f tree =
    let f' x () = Lwt.return (f x) in
    fold_s f' () tree
end

module OmapTraversal = struct
  module type S = sig
    type t

    val lookup_phys_oid : t -> (int64 * int64) -> Omap_val.t option Lwt.t
  end

  module Make (P : ObjectPool) = struct
    include BtreeTraversal(OmapBtree_node)(P)

    type pool_t = P.t

    let lookup_phys_oid tree oid_xid =
      let* bt_lookup_result = lookup_floor tree oid_xid in
      match bt_lookup_result with
      | None -> Lwt.return None
      | Some ((k_oid, _k_xid), v) ->
          if k_oid = (fst oid_xid) then Lwt.return (Some v)
          else Lwt.return None
  end

end

module PhysObjectPool (B : BlockDevice) = struct
  module DU = DeviceUtils(B)

  type t = {
    dev: B.t;
  }
  type omap_t = unit

  let read_object pool ?size = function
    | Physical addr -> DU.read_obj_raise pool.dev ?size addr
    | Virtual addr -> failwith "this object pool does not support virtual adresses"
    | Ephemeral addr -> failwith "err ephem"

  let read_block pool ?size = function
    | Physical addr -> DU.read_obj_raw_raise pool.dev ?size addr
    | Virtual addr -> failwith "this object pool does not support virtual adresses"
    | Ephemeral addr -> failwith "err ephem"

  let create dev = {
    dev;
  }
end

(** High-level object pool with support for virtual addresses, layered on top
 * of a backing pool, which is used for Physical addresses *)
module VObjectPool (P : ObjectPool) = struct
  module O = OmapTraversal.Make(P)

  type t = {
    backing: P.t;
    omap: O.t;
    xid: xid_t;
  }
  type omap_t = O.t

  let read_object pool ?size = function
    | (Physical _) as addr -> P.read_object pool.backing ?size addr
    | Virtual addr ->
        (* TODO: use real xid *)
        let omap = pool.omap in
        let* phys_addr = O.lookup_phys_oid omap (addr, Int64.max_int) in
        (match phys_addr with
        | Some omap_v ->
            Option.iter (fun size -> assert (size=omap_v.ov_size)) size;
            P.read_object pool.backing ~size:omap_v.ov_size (Physical omap_v.ov_paddr)
        | None -> failwith (Printf.sprintf "virtual object not found: %Ld" (addr))
        )
    | Ephemeral addr -> failwith "err"

  let read_block pool ?size = function
    | (Physical _) as addr -> P.read_block pool.backing ?size addr
    | Virtual addr ->
        let omap = pool.omap in
        let* phys_addr = O.lookup_phys_oid omap (addr, pool.xid) in
        (match phys_addr with
        | Some omap_v ->
            Option.iter (fun size -> assert (size=omap_v.ov_size)) size;
            P.read_block pool.backing ~size:omap_v.ov_size (Physical omap_v.ov_paddr)
        | None -> failwith (Printf.sprintf "virtual object not found: %Ld" (addr))
        )
    | Ephemeral addr -> failwith "err"

  open Lwt.Infix

  let read_omap backing omap_phys_oid =
    let* omap_obj = P.read_object backing (Physical omap_phys_oid) >|= Object.to_omap_phys "object map tree" in
    let* omap_btree = P.read_object backing (Physical omap_obj.Omap_phys.om_tree_oid)
      >|= Object.to_omap_btree "object map btree" in
    Lwt.return (O.create backing omap_btree)

  let create backing omap xid = {
    backing; omap; xid;
  }

  let create_from_omap backing_pool omap_phys_oid xid =
    let* omap = read_omap backing_pool omap_phys_oid in
    Lwt.return (create backing_pool omap xid)
end

module FsTraversal (P : ObjectPool) = struct
  include BtreeTraversal(FsBtree_node)(P)
end

module LoopbackBlockDevice = struct
  type t = {
    file: Lwt_io.input_channel;
    blocksize: int;
    mutex: Lwt_mutex.t;
  }

  let create file blocksize = {
    file;
    blocksize;
    mutex = Lwt_mutex.create ();
  }

  let of_file filename ~readonly blocksize =
    assert readonly;
    let* imgfile = Lwt_unix.(openfile filename [O_RDONLY] 0o600) in
    let imgchan = Lwt_io.(of_fd ~mode:input imgfile) in
    Lwt.return (create imgchan blocksize)

  let read_block dev ?size (addr:int64) =
    let {file;blocksize;} = dev in
    let size = Option.value size ~default:blocksize in
    (*Printf.printf "reading block %Ld (%d) -> %d bytes\n" addr blocksize size;*)
    let buf = Bytes.create size in
    let offset = Int64.(mul addr (of_int blocksize)) in
    let* () = Lwt_mutex.with_lock dev.mutex (fun () ->
      let* () = Lwt_io.set_position file offset in
      (*Printf.printf "moved to pos %Ld\n" offset;*)
      let* () = Lwt_io.read_into_exactly file buf 0 size in
      (*Printf.printf "read from pos %Ld\n" offset;*)
      Lwt.return ()
    ) in
    (*Printf.printf "read block %Ld (%d) -> %d bytes\n" addr blocksize size;*)
    Lwt.return (Cstruct.of_bigarray (Lwt_bytes.of_bytes buf))

end

module Container = struct
  module type S = sig
    module P : ObjectPool
    module VP : ObjectPool

    type dev_t

    type t = {
      blocksize: int;
      cnt_phys_pool: P.t;
      cnt_pool: VP.t;
      superblock: Nx_superblock.t;
      current_xid: int64;
    }

    val connect : dev_t -> t Lwt.t
  end

  module Make (B : BlockDevice) : (S with type dev_t = B.t) = struct
    module B = B
    module P = PhysObjectPool(B)
    module VP = VObjectPool(P)
    module O = VP.O

    type dev_t = B.t

    type t = {
      blocksize: int;
      cnt_phys_pool: P.t;
      cnt_pool: VP.t;
      superblock: Nx_superblock.t;
      current_xid: int64;
    }

    let find_latest_superblock pool sb_blockzero =
      if Nx_superblock.checkpoint_desc_area_contiguous sb_blockzero then begin
        Printf.printf "reading checkpoint area\n";
        let* objs = Lwt_list.map_s
          (fun addr -> P.read_object pool (Physical addr))
          (addr_range sb_blockzero.nx_xp_desc_base (Int32.to_int sb_blockzero.nx_xp_desc_blocks))
        in
          Printf.printf "read objects from xp_desc area: %s\n" (String.concat "; " (List.map (Object.show) objs));
          Lwt_result.return (find_latest_sb_obj objs)
      end
      else failwith "checkpoint area is not contiguous"

    let connect dev =
      let* ba_buf = B.read_block dev 0L in
      let superblock = Nx_superblock.of_buffer ba_buf in
      Printf.printf "superblock: %s\n" (Nx_superblock.show superblock);
      assert (superblock.nx_block_size = 4096);
      let cnt_phys_pool = P.create dev in
      let* (sb_obj, latest_superblock) = find_latest_superblock cnt_phys_pool superblock |> Lwt_result.get_exn in
      let blocksize = latest_superblock.nx_block_size in
      let current_xid = sb_obj.o_xid in
      let* cnt_pool = VP.create_from_omap cnt_phys_pool latest_superblock.Nx_superblock.nx_omap_oid current_xid in
      Lwt.return {
        blocksize;
        cnt_phys_pool;
        cnt_pool;
        superblock = latest_superblock;
        current_xid;
      }

  end

end

module FileSystem = struct
  module type S = sig
    type error

    type t
    type cnt_t

    type vnode
    [@@deriving show]

    type stat = {
      st_size: int64;
      st_atime: int64;
      st_mtime: int64;
      st_ctime: int64;

      st_mode: int32;
      st_uid: int32;
      st_gid: int32;
    }
    [@@deriving show]

    val connect : cnt_t -> t Lwt.t

    val root : t -> vnode Lwt.t
    val listdir : t -> vnode -> (string list) Lwt.t
    val lookup : t -> vnode -> string -> (vnode, error) Lwt_result.t
    val stat : t -> vnode -> (stat, error) Lwt_result.t

    val test : t -> unit Lwt.t
  end

  module Make (C : Container.S) : (S with type cnt_t = C.t) = struct
    module VP = VObjectPool(C.P)
    module FS = FsTraversal(VP)

    open Lwt.Infix

    type cnt_t = C.t
    type t = {
      fs_cnt: C.t;
      fs_pool: VP.t;
      fs_root: FS.t;
      fs_superblock: Apfs_superblock.t;
    }

    type vnode = int64
    [@@deriving show]

    type stat = {
      st_size: int64;
      st_atime: int64;
      st_mtime: int64;
      st_ctime: int64;

      st_mode: int32;
      st_uid: int32;
      st_gid: int32;
    }
    [@@deriving show]

    type error = [
      | `No_directory_entry
      ]

    let connect cnt =
      let fs0_oid = Virtual (List.hd cnt.C.superblock.nx_fs_oid) in
      let* fs_hdr, sb = C.VP.read_object cnt.cnt_pool fs0_oid >|= Object.to_fs "fs root" in
      let fs_root_tree_oid = Virtual sb.Apfs_superblock.apfs_root_tree_oid in
      let fs_current_xid = fs_hdr.o_xid in
      let* fs_pool = VP.create_from_omap cnt.cnt_phys_pool sb.apfs_omap_oid fs_current_xid in
      let* root_obj = VP.read_object fs_pool fs_root_tree_oid >|= Object.to_fs_btree "fs btree" in
      Format.printf "root_tree object: %s\n" (Apfs_structs.Types.FsBtree.show root_obj);
      Lwt.return {
        fs_cnt = cnt;
        fs_pool;
        fs_root = FS.create fs_pool root_obj;
        fs_superblock = sb;
      }

    let read fs fname _offset _length =
      Lwt.return (Ok [])

    let private_root fs =
      Lwt.return 1L

    let make_drec_key inode name =
      ((inode, FsObject.type_dir_rec),
      FsObject.DirRec {name; name_len_and_hash = Util.name_len_and_hash name}
      )

    let lookup fs dir name =
      let lookup_key = make_drec_key dir name in
      Format.printf "looking for: %a\n" FsBtree_key.pp lookup_key;
      let* lookup_result = FS.find fs.fs_root lookup_key in
      match lookup_result with
        | Some (_, (DirRec dr)) -> Lwt_result.return dr.file_id
        | Some (k, v) -> Format.printf "other: (%a, %a)\n" FsBtree_key.pp k FsBtree_val.pp v;Lwt_result.fail `No_directory_entry
        | _ -> Format.printf "Nothing found\n";Lwt_result.fail `No_directory_entry

    let root fs =
      let* priv_root = private_root fs in
      let* real_root = lookup fs priv_root "root" in
      match real_root with
        | Ok inode -> Lwt.return inode
        | _ -> failwith "unable to find root"

    let stat fs inode =
      let lookup_key = ((inode, FsObject.type_inode), FsObject.Inode inode) in
      let* lookup_result = FS.find fs.fs_root lookup_key in
      match lookup_result with
        | Some (_, (Inode v)) -> Format.printf "other: %a\n" FsBtree_val.pp (Inode v);Lwt_result.return {
            st_size = 0L;
            st_atime = 0L;
            st_mtime = 0L;
            st_ctime = 0L;
            st_mode = 0l;
            st_uid = 0l;
            st_gid = 0l;
          }
        | Some (k, v) -> Lwt_result.fail `No_directory_entry
        | _ -> Lwt_result.fail `No_directory_entry

    let listdir fs dir =
      Lwt.return []

    let test fs =
      let sub_fs = FS.limit
        ~lower_bound:((0L,0), FsObject.Invalid)
        ~upper_bound:((2L,99), FsObject.InvalidMax)
        fs.fs_root in
      let* all_entries = FS.fold (
        fun (k,v) l -> ((k,v)::l)
      ) [] fs.fs_root in
      let () = List.iter (fun (k,v) -> Format.printf "=========================\n%a =>\n %a\n" FsBtree_key.pp k FsBtree_val.pp v) (List.rev all_entries) in
      Lwt.return ()
  end
end

module LoCnt = Container.Make(LoopbackBlockDevice)
module LoFS = FileSystem.Make(LoCnt)

open Lwt.Infix

let lwt_main () =
  let* dev = LoopbackBlockDevice.of_file "bigtest1g.img" 4096 ~readonly:true in
  let* container = LoCnt.connect dev in
  let* fs = LoFS.connect container in
  let* () = LoFS.test fs in
  let* root_inode = LoFS.root fs in
  let* dir_inode = LoFS.lookup fs root_inode "linux-5.3-rc8" >|= get_ok in
  Format.printf "root_inode: %a / dir_inode: %a\n" LoFS.pp_vnode root_inode LoFS.pp_vnode dir_inode;
  let* dir_stat = LoFS.stat fs dir_inode >|= get_ok in
  Format.printf "stat:\n%a\n" LoFS.pp_stat dir_stat;
  Lwt.return ()

let apfs_main () =
  let hashstr = "xattr.c" in
  Lwt_main.run (lwt_main ());
  Printf.printf "hash of %s = 0x%08lx (should be 0x%08x)" hashstr (Int32.logand 0x3fffffl (Util.name_hash hashstr)) 660964