(import array) ;;? Iterate over the unicode scalar values / code points / UTF-32 values of the ;;? string. ;;? ;;? A `Str` is UTF-8, so this function essentially converts from UTF-8 to UTF-32 ;;? on the fly in the form of a lazy iterator. ;;? ;;? Note that a code point may not match your idea of what a character ;;? is. Iteration over grapheme clusters may instead be what you really want. (define: (string/codepoints s) (Fun Str (Iter Nat32)) (define: (init-byte b) (Fun Nat8 [Nat8 Nat]) (if (= (cast 0) (bit-and b (cast 0b10000000))) [b (cast 0)] (if (= (cast 0) (bit-and b (cast 0b00100000))) [(bit-and b (cast 0b00011111)) (cast 1)] (if (= (cast 0) (bit-and b (cast 0b00010000))) [(bit-and b (cast 0b00001111)) (cast 2)] [(bit-and b (cast 0b00000111)) (cast 3)])))) (define: cont-byte (Fun Nat8 Nat8) (bit-and (cast 0b00111111))) (define: (join a b) (Fun Nat32 Nat8 Nat32) (+ (shift-l (cast a) (cast 6)) (cast (cont-byte b)))) (define (go bs) (Iter (fun (_) (maybe/map (fun ([b0 bs']) (let1 [b0' n] (init-byte b0) [(foldl join (cast b0') (take (cast n) bs')) (go (skip (cast n) bs'))])) (next bs))))) (go (string/bytes s))) ;;? Iterate over the indivitual bytes of a string. ;;? ;;? Note that a single byte only represents a whole "character" if your UTF-8 ;;? string is actually only of the ASCII subset. (define (string/bytes (Str xs)) (array/iter xs)) ;;? Show an integer in hexadecimal notation (define: (show-hex n) (Fun Int Str) (define: (it n) (Fun Int (Iter Nat8)) (if (= n 0) iter/nil (let ((x (bit-and 0b1111 n)) (c (if (< x 0xa) (+ 48 x) (+ 97 (- x 0xa))))) (iter/cons (cast c) (it (shift-r n 4)))))) (if (= n 0) "0x0" (str-append "0x" (Str (array/collect (reverse (it n))))))) (define: (lines (Str s)) (Fun Str (Iter Str)) (define (lines' s) (Iter (fun (Unit) (maybe/map (fun (i) (map-two Str (= c ascii-0) (<= c ascii-9))) (define (parse-digit c) (if (digit? c) (Some (- c ascii-0)) None))