~jojo/Carth

ref: 749208029494a7c48ce04444a513424f4b998416 Carth/std/string.carth -rw-r--r-- 3.4 KiB
74920802JoJo update TODO 6 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
(import array)

;;? Iterate over the unicode scalar values / code points / UTF-32 values of the
;;? string.
;;?
;;? A `Str` is UTF-8, so this function essentially converts from UTF-8 to UTF-32
;;? on the fly in the form of a lazy iterator.
;;?
;;? Note that a code point may not match your idea of what a character
;;? is. Iteration over grapheme clusters may instead be what you really want.
(define: (string/codepoints s)
    (Fun Str (Iter Nat32))
  (define: (init-byte b)
      (Fun Nat8 [Nat8 Nat])
    (if (= (cast 0) (bit-and b (cast 0b10000000)))
        [b (cast 0)]
      (if (= (cast 0) (bit-and b (cast 0b00100000)))
          [(bit-and b (cast 0b00011111))
           (cast 1)]
        (if (= (cast 0) (bit-and b (cast 0b00010000)))
            [(bit-and b (cast 0b00001111))
             (cast 2)]
          [(bit-and b (cast 0b00000111))
           (cast 3)]))))
  (define: cont-byte (Fun Nat8 Nat8) (bit-and (cast 0b00111111)))
  (define: (join a b) (Fun Nat32 Nat8 Nat32)
    (+ (shift-l (cast a) (cast 6))
       (cast (cont-byte b))))
  (define (go bs)
    (Iter
     (fun (_)
       (maybe/map (fun ([b0 bs'])
                    (let1 [b0' n] (init-byte b0)
                      [(foldl join (cast b0') (take (cast n) bs'))
                       (go (skip (cast n) bs'))]))
                  (next bs)))))
  (go (string/bytes s)))

;;? Iterate over the indivitual bytes of a string.
;;?
;;? Note that a single byte only represents a whole "character" if your UTF-8
;;? string is actually only of the ASCII subset.
(define (string/bytes (Str xs))
  (array/iter xs))

;;? Show an integer in hexadecimal notation
(define: (show-hex n) (Fun Int Str)
  (define: (it n) (Fun Int (Iter Nat8))
    (if (= n 0)
        iter/nil
      (let ((x (bit-and 0b1111 n))
            (c (if (< x 0xa)
                   (+ 48 x)
                 (+ 97 (- x 0xa)))))
        (iter/cons (cast c) (it (shift-r n 4))))))
  (if (= n 0)
      "0x0"
    (str-append "0x" (Str (array/collect (reverse (it n)))))))

(define: (lines (Str s))
    (Fun Str (Iter Str))
  (define (lines' s)
    (Iter (fun (Unit) (maybe/map (fun (i) (map-two Str
                                                   (<o lines' (array/skip 1))
                                                   (unwrap! (array/split i s))))
                                 (array/find = (cast ascii-newline) s)))))
  (lines' s))

(define: ascii-newline Nat8 (cast 0xA))
(define: ascii-minus Nat8 (cast 0x2D))
(define: ascii-0 Nat8 (cast 0x30))
(define: ascii-9 Nat8 (cast 0x39))

(define (parse-int s)
  (maybe/bindr (next (string/bytes s))
               (fun ([c cs])
                 (let1 [sign cs] (if (= c ascii-minus)
                                     [-1 cs]
                                   [1 (iter/cons c cs)])
                   (maybe/map (<o (* sign) cast) (parse-nat' cs))))))

(define (parse-nat s) (parse-nat' (string/bytes s)))
(define (parse-nat' cs)
  (define: (parse-nat'' [c cs])
      (Fun [Nat8 (Iter Nat8)] (Maybe Nat))
    (maybe/bindr (parse-digit c)
                 (fun (x)
                   (match (next cs)
                     (case None (Some (cast x)))
                     (case (Some nx)
                           (maybe/map (fun (y) (+ (cast x) (* (cast 10) y)))
                                      (parse-nat'' nx)))))))
  (maybe/bindl parse-nat'' (next (reverse cs))))

(define (digit? c) (and (>= c ascii-0) (<= c ascii-9)))

(define (parse-digit c) (if (digit? c) (Some (- c ascii-0)) None))