~williewillus/racket-rfc8949

4dbf1b6a3411fb1d8cd045d25fab391a4075cc71 — Vincent Lee 3 years ago 9e0f0ff
Add a registry for tag deserialization
3 files changed, 93 insertions(+), 69 deletions(-)

M common.rkt
M decode.rkt
M encode.rkt
M common.rkt => common.rkt +25 -4
@@ 2,10 2,6 @@

(require racket/contract)

; Parameter that holds the object used to represent null
(provide cbor-null)
(define cbor-null (make-parameter 'null #f 'cbor-null))

(provide unassigned-simple-value?)
(define unassigned-simple-value?
  (or/c (integer-in 0 19) (integer-in 32 255)))


@@ 31,3 27,28 @@
  (tag
   value)
  #:transparent)


(provide cbor-config?
         cbor-config-tag-deserializers
         cbor-config-null-value
         empty-config
         with-tag-deserializer
         with-cbor-null)

(struct cbor-config
  (tag-deserializers
   null-value))

(define empty-config (cbor-config #hasheqv() 'null))

(define (with-tag-deserializer config id deser)
  (define old-handlers (cbor-config-tag-deserializers config))
  (struct-copy
   cbor-config config
   [tag-deserializers (hash-set old-handlers id deser)]))

(define (with-cbor-null config v)
  (struct-copy
   cbor-config config
   [null-value v]))

M decode.rkt => decode.rkt +65 -63
@@ 46,18 46,13 @@
                [(2 3 4 5) 'indefinite]
                [(7) (error "Unexpected break code")])]))) ; We handle break explicitly in deserialization of lists of things, so it should never appear here.

(define (cbor-read-uint port additional)
(define (cbor-read-uint _config port additional)
  (read-argument port 0 additional))

(define (cbor-read-neg port additional)
(define (cbor-read-neg _config port additional)
  (- -1 (read-argument port 1 additional)))

(define (dbg port [msg #f])
  (when msg
    (display msg))
  (writeln (peek-bytes 9999 0 port)))

; repeatedly calls `read-inner` on the port until a break code is reached
; repeatedly calls `read-inner` until a break code is reached
; after a break code is reached, the list of all read values (in reverse read order) is passed to `combiner`
(define (cbor-read-indefinite port read-inner combiner)
  (let loop ([acc '()])


@@ 68,98 63,73 @@
            (unless (port-commit-peeked 1 prog always-evt port)
              (error "Failed to commit peeking of break code, is someone else using this port concurrently?"))
            (combiner acc))
          (loop (cons (read-inner port) acc))))))
          (loop (cons (read-inner) acc))))))

(define (cbor-read-byte-string port additional)
(define (cbor-read-byte-string config port additional)
  (define payload-len (read-argument port 2 additional))
  (if (eq? payload-len 'indefinite)
      (cbor-read-indefinite port
                            cbor-read ; TODO disallow indefinites in the chunks, and also check that they are actually bytestrings
                            (lambda () (cbor-read config port)) ; TODO disallow indefinites in the chunks, and also check that they are actually bytestrings
                            (lambda (strs)
                              (bytes->immutable-bytes (apply bytes-append (reverse strs)))))
      (let ([buf (make-bytes payload-len)])
        (bytes->immutable-bytes (read-n-bytes buf port)))))

(define (cbor-read-string port additional)
(define (cbor-read-string config port additional)
  (define payload-len (read-argument port 3 additional))
  (if (eq? payload-len 'indefinite)
      (cbor-read-indefinite port
                            cbor-read ; TODO disallow indefinites in the chunks, and also check that they are actually strings
                            (lambda () (cbor-read config port)) ; TODO disallow indefinites in the chunks, and also check that they are actually strings
                            (lambda (strs)
                              (apply string-append-immutable (reverse strs))))
      (let ([buf (make-bytes payload-len)])
        (read-n-bytes buf port)
        (string->immutable-string (bytes->string/utf-8 buf)))))

(define (cbor-read-list port additional)
(define (cbor-read-list config port additional)
  (define num-items (read-argument port 4 additional))
  (if (eq? num-items 'indefinite)
      (cbor-read-indefinite port cbor-read reverse)
      (cbor-read-indefinite port
                            (lambda () (cbor-read config port))
                            reverse)
      (let loop ([acc '()]
                 [n num-items])
        (if (zero? n)
            (reverse acc)
            (loop (cons (cbor-read port) acc)
            (loop (cons (cbor-read config port) acc)
                  (sub1 n))))))

(define (cbor-read-map port additional)
(define (cbor-read-map config port additional)
  (define num-pairs (read-argument port 5 additional))
  (if (eq? num-pairs 'indefinite)
      (cbor-read-indefinite port
                            (lambda (port)
                              (cons (cbor-read port)
                                    (cbor-read port)))
                            (lambda ()
                              (cons (cbor-read config port)
                                    (cbor-read config port)))
                            make-immutable-hash)
      (let loop ([acc #hash()]
                 [n num-pairs])
        (if (zero? n)
            acc
            (let* ([k (cbor-read port)]
                   [v (cbor-read port)])
            (let* ([k (cbor-read config port)]
                   [v (cbor-read config port)])
              (loop (hash-set acc k v)
                    (sub1 n)))))))

(define (interpret-tag tag-value data)
  ; todo implement all the unimplemented stuff from the RFC itself, and add a registry for
  ; other libraries to add their own extensions
  (case tag-value
    [(2) ; bignum
     (unless (bytes? data)
       (error "Bignum payload must be bytes"))
     (let ([len (bytes-length data)])
       (let loop ([idx (sub1 len)]
                  [acc 0])
         (if (= -1 idx)
             acc
             (let ([chunk (bytes-ref data 'todo)]
                   [shift 'todo])
               (loop (sub1 idx)
                     (bitwise-ior acc (arithmetic-shift chunk shift)))))))]
    [(30) ; rational
     (unless (and (list? data)
                  (= 2 (length data))
                  (exact-integer? (car data))
                  (exact-integer? (cadr data))
                  (positive? (cadr data)))
       (error "Rational (tag 30) must have a payload of [integer numerator, positive denominator]"))
     (/ (car data) (cadr data))]
    [(#xffff #xffffffff #xffffffffffffffff)
     (error "Registered invalid tag")]
    [else #f]))

(define (cbor-read-tag port additional)
(define (cbor-read-tag config port additional)
  (let* ([tag (read-argument port 6 additional)]
         [data (cbor-read port)]
         [interpreted (interpret-tag tag data)])
    (or interpreted (tagged-value tag data))))
         [data (cbor-read config port)]
         [handler (hash-ref (cbor-config-tag-deserializers config)
                            tag (lambda () tagged-value))])
    (handler tag data)))

(define (parse-simple-value n)
(define (parse-simple-value config n)
  (if (unassigned-simple-value? n)
      (simple-value n)
      (case n
        [(20) #f]
        [(21) #t]
        [(22) (cbor-null)]
        [(22) (cbor-config-null-value config)]
        [(23) undefined]
        [(24 25 26 27 28 29 30 31)
         (error "Simple value ~a is reserved" n)]


@@ 183,15 153,15 @@
        val
        (- val))))

(define (cbor-read-special port additional)
(define (cbor-read-special config port additional)
  (if (<= additional 23)
      (parse-simple-value additional)
      (parse-simple-value config additional)
      (case additional
        [(24)
         (let ([extra (read-byte port)])
           (if (< extra 32)
               (error "Simple value used extra byte to encode ~a when it should have been in the initial byte" extra)
               (parse-simple-value extra)))]
               (parse-simple-value config extra)))]
        [(25)
         (decode-half (bitwise-ior
                       (arithmetic-shift (read-byte port) 8)


@@ 207,17 177,49 @@
        [(28 29 30) (error "Additional information ~a is reserved in major type 7" additional)]
        [(31) (error "Unexpected break code")]))) ; We handle break explicitly in deserialization of lists of things, so it should never appear here.


(define (deserialize-bignum _tag data)
  (unless (bytes? data)
    (error "Bignum payload must be bytes"))
  (integer-bytes->integer data #f #t))

(define (deserialize-bigneg tag data)
  (- -1 (deserialize-bignum tag data)))

(define (deserialize-rational _tag data)
  (unless (and (list? data)
               (= 2 (length data))
               (exact-integer? (car data))
               (exact-integer? (cadr data))
               (positive? (cadr data)))
    (error "Rational (tag 30) must have a payload of [integer numerator, positive denominator]"))
  (/ (car data) (cadr data)))

(define (deserialize-invalid-tag tag _data)
  (error (format "Tag ~a is registered as invalid at the IANA" tag)))

(define default-config
  (foldl (lambda (p config)
           (with-tag-deserializer config (car p) (cdr p)))
         empty-config
         (list (cons 2 deserialize-bignum)
               (cons 3 deserialize-bigneg)
               (cons 30 deserialize-rational)
               (cons #xffff deserialize-invalid-tag)
               (cons #xffffffff deserialize-invalid-tag)
               (cons #xffffffffffffffff deserialize-invalid-tag))))
  
(define major-dispatch-table
  (vector-immutable
   cbor-read-uint cbor-read-neg cbor-read-byte-string cbor-read-string
   cbor-read-list cbor-read-map cbor-read-tag cbor-read-special))

; Deserialize one cbor value from the given input port. Can and will be called reentrantly.
(define (cbor-read in)
(define (cbor-read config in)
  (define header (read-byte in))
  (let ([major (bitwise-bit-field header 5 8)]
        [additional (bitwise-and header #b11111)])
    ((vector-ref major-dispatch-table major) in additional)))
    ((vector-ref major-dispatch-table major) config in additional)))

(module* test #f
  (require rackunit


@@ 233,7 235,7 @@
              (range (/ (string-length s) 2))))))

  (define (try str)
    (cbor-read (open-input-bytes (hex-bytes str))))
    (cbor-read default-config (open-input-bytes (hex-bytes str))))

  (test-case
      "Deserialization - RFC 8949 Int and Bignum Examples"


@@ 326,7 328,7 @@
      "Deserialization - RFC 8949 Simple Value Examples"
    (check-equal? (try "f4") #f)
    (check-equal? (try "f5") #t)
    (check-equal? (try "f6") (cbor-null))
    (check-equal? (try "f6") 'null)
    (check-equal? (try "f7") undefined)
    (check-equal? (try "f0") (simple-value 16))
    (check-equal? (try "f8ff") (simple-value 255)))

M encode.rkt => encode.rkt +3 -2
@@ 101,7 101,7 @@
        (write-byte (combine 7 24) out)
        (write-byte inner out))))

(define (cbor-write v out)
(define (cbor-write config v out)
  (cond
    [(and (exact-integer? v) (<= 0 v u64-max))
     (cbor-write-uint v out)]


@@ 109,7 109,7 @@
     (cbor-write-neg v out)]
    [(eq? v #f) (write-byte (combine 7 20) out)]
    [(eq? v #t) (write-byte (combine 7 21) out)]
    [(equal? v (cbor-null)) (write-byte (combine 7 22) out)]
    [(equal? v (cbor-config-null-value config)) (write-byte (combine 7 22) out)]
    [(eq? v undefined) (write-byte (combine 7 23) out)]
    [(single-flonum? v) (cbor-write-float v out)]
    [(flonum? v) (cbor-write-double v out)]


@@ 120,3 120,4 @@
    [(tagged-value?) (cbor-write-tag v out)] ; todo handle interpreted values
    [(simple-value? v) (cbor-write-simple-value v out)]
    [else (error "Don't know how to write" v)]))
; todo support writing vectors as lists, sequences as indefinite lists