~nytpu/lasso

7a833d5e6850f68f94fdb4189011c388800c4e6a — nytpu a month ago 75e569b
gemini: use cl-smallnet library instead of custom-written Gemini client
7 files changed, 14 insertions(+), 393 deletions(-)

M gemini.lisp
M lasso.asd
M lasso.lisp
M package.lisp
D test/gemini.lisp
M test/package.lisp
M types.lisp
M gemini.lisp => gemini.lisp +12 -247
@@ 3,172 3,6 @@
;;;; For more license details, see LICENSE or <https://www.gnu.org/licenses/agpl-3.0.html>.
(cl:in-package :gemini)

(define-constant +status-codes+
  '((10 . :input) (11 . :input-sensitive) (20 . :success)
    (30 . :redirect-temporary) (31 . :redirect-permanent)
    (40 . :temporary-failure) (41 . :server-unavailable)
    (42 . :cgi-error) (43 . :proxy-error) (44 . :slow-down)
    (50 . :permanent-failure) (51 . :not-found) (52 . :gone)
    (53 . :proxy-request-refused) (59 . :bad-request)
    (60 . :client-cert-required) (61 . :cert-not-authorized)
    (62 . :cert-invalid))

  :test #'equal
  :documentation "Gemini response codes according to the specification")

(defun statusp (status)
  "Whether or not a given Gemini response status maps to a valid code"
  (if (rassoc status +status-codes+) t nil))

(defun codep (code)
  "Whether or not a given Gemini response code is a valid response code."
  (if (assoc code +status-codes+) t nil))

(deftype code ()
  "A valid Gemini response code"
  `(satisfies codep))

(defun-contract code->status (code)
  ((:post (or (statusp %)
              (eq % :invalid-response-status)))
   (:documentation "Query a given numeric response code and return the associated symbol, or
  return `invalid-response-status' if the numeric code is invalid."))
  (let ((status (assoc code +status-codes+)))
    (if status
        (cdr status)
        :invalid-response-status)))

(defun-contract status->code (status)
  ((:post (or (codep %)
              (eq % :invalid-response-status)))
   (:documentation "Query a given response symbol and return the associated numeric code, or return `invalid-response-status' if the symbol is invalid."))
  (let ((code (rassoc status +status-codes+)))
    (if code
        (car code)
        :invalid-response-status)))

(define-condition malformed-response (error)
  ((reason :initarg :reason :reader malformed-response-reason)
   (response :initarg :response :reader malformed-response-response))
  (:documentation "Raised when a malformed response is received.")
  (:report (lambda (condition stream)
             (format stream "~a: ~s"
                     (malformed-response-reason condition)
                     (malformed-response-response condition)))))

(define-constant +crlf+ (format nil "~c~c" #\return #\newline)
  :test #'string=
  :documentation "A string containing CRLF")

(defvar *ctx* nil
  "The TLS context used for requests.")

(defun-contract context-init ()
  ((:post *ctx*
          (eq % *ctx*))
   (:documentation "Initialize the TLS context if it has not already been initialized."))
  (when (not *ctx*)
    (setf *ctx*
          (ssl:make-context :disabled-protocols
                              (list ssl:+ssl-op-no-sslv2+ ssl:+ssl-op-no-sslv3+
                                    ssl:+ssl-op-no-tlsv1+ ssl:+ssl-op-no-tlsv1-1+)
                            :verify-mode ssl:+SSL-VERIFY-NONE+
                            :options (list ssl::+ssl-op-all+ ssl::+ssl-op-ignore-unexpected-eof+))))
  *ctx*)

(defun-contract context-free ()
  ((:post (not *ctx*))
   (:documentation "Close and free the TLS context in *CTX*."))
  (when *ctx*
    (ssl:ssl-ctx-free *ctx*)
    (setf *ctx* nil))
  (values))

(defun-contract read-line-crlf (stream &optional eof-error-p)
  ((:pre (typecheck stream stream)
         (open-stream-p stream)
         (input-stream-p stream)
         (equal (stream-element-type stream)
                '(unsigned-byte 8)))
   (:post (typecheck % string))
   (:documentation "Read a CRLF-terminated line from a binary stream and return a string, including the CRLF at the end.  If EOF-ERROR-P is true, then raise an END-OF-FILE condition if EOF is reached prior to reading a full line; otherwise return whatever was read prior to EOF (will not have a CRLF at the end)."))
  (with-output-to-string (s)
    ; Store the previous character as look-behind because there's no such thing
    ; as PEEK-BYTE for some dumb reason.
    (for ((prev as nil)
          (cur = (restart-case
                     (read-byte stream eof-error-p nil)
                   ; Offer the behavior of EOF-ERROR-P being NIL even if it was
                   ; set to true
                   (return-string ()
                     :report "Return the line that has been read prior to EOF."
                     nil))))
      (when cur
        ; we know that if CUR isn't NIL then it hold a character from now on
        ; even though CODE-CHAR should presumably be fast, I don't like the
        ; idea of calling it 3–4 times each loop instead of setting it once here
        (setf cur (code-char cur))
        (write-char cur s))
      ; put after writing to the string to ensure we don't lose the LF before
      ; exiting the loop.
      (until
        (or (not cur)
            (and (and prev (char= prev #\return))
                 (and cur (char= cur #\linefeed)))))
      (setf prev cur))))

(defun-contract parse-response (resp)
  ((:pre (typecheck resp string))
   (:post (= (length %) 2)
          (typecheck (car %) code)
          (typecheck (cadr %) string))
   (:documentation "Parse a Gemini response as detailed in the specification."))
  (unless (uiop:string-suffix-p resp +crlf+)
    (error 'malformed-response :reason "Response not CRLF-terminated"
           :response resp))
  ; easier to prune off the ending now
  (setf resp (string-right-trim +crlf+ resp))
  (unless (and (>= (length resp) 3)
               (digit-char-p (elt resp 0))
               (digit-char-p (elt resp 1))
               (char= #\space (elt resp 2))) ; responses must have a space here!
    (error 'malformed-response :reason "Response status malformed or not present"
           :response resp))
  (let ((status (parse-integer (subseq resp 0 2)))
        ; safe because we know it's at least length 3, so an empty meta will
        ; result in an empty string returned by SUBSEQ
        (meta (subseq resp 3)))
    (unless (codep status)
      (error 'malformed-response :reason "Invalid status code"
             :resp resp))
    (when (> (length meta) 1024)
      (error 'malformed-response :reason "Response longer than 1024 bytes"
             :response resp))
    (values status meta)))

(defun-contract read-all (stream)
  ((:pre (streamp stream)
         (open-stream-p stream)
         (input-stream-p stream))
   (:post (typep % '(vector (unsigned-byte 8) *)))
   (:documentation "Read all elements present on the given STREAM and return a vector of them."))
  ;; Have to use READ-BYTE instead of the more efficient READ-SEQUENCE because READ-SEQUENCE is
  ;; actually useless since it doesn't do partial reads and will hang forever until the full size of
  ;; the buffer is available
  (loop with data of-type vector = (make-array 0 :element-type '(unsigned-byte 8) :adjustable t :fill-pointer 0)
        for b of-type (or (unsigned-byte 8) null) =
          (handler-case
              (read-byte stream nil)
            (ssl::ssl-error-ssl () nil))
        while b
        do (vector-push-extend b data)
        finally (return data)))

#|
(with-open-file (stream "/tmp/test.txt" :element-type '(unsigned-byte 8) :direction :input)
  (length (read-all stream)))
|#

(defun-contract get-title (body)
  ((:pre (typecheck body (or string null)))
   (:pre (typecheck % string))


@@ 185,35 19,6 @@
      (when first
        (string-left-trim "# " line)))))

(defun-contract retrieve-body (mime-type stream)
  ((:pre (typecheck mime-type mimeparse:media-range)
         (streamp stream)
         (open-stream-p stream)
         (input-stream-p stream))
   (:post (typecheck % (or string (vector (unsigned-byte 8) *))))
   (:documentation "Read the body of a response from the given stream, and decode it to a string if it's a textual body *in a recognized encoding*, or return octets otherwise."))
  (let* ((charset
           (handler-case
               (-<> mime-type
                 mimeparse::media-params ; export your accessors people!
                 (assoc "charset" <> :test #'string=)
                 cadr
                 ; spec says to default to UTF-8 if no charset is explicitly given
                 (or "utf-8")
                 string-upcase
                 (intern :keyword)
                 babel-encodings:get-character-encoding)
             ; if the charset isn't recognized by Babel return NIL
             (simple-error () nil))))
    (if (and (string= "text" (mimeparse::media-type mime-type))
             charset)
        (-> stream
          (read-all)
          (babel:octets-to-string :encoding charset))
        (error 'malformed-response :reason "Non-textual body or body in unrecognized charset" :resp nil)
        ; return the binary body if it's non-textual or in an unknown charset
        #|(read-all stream)|#)))

(defun-contract gemini-request (uri &key host port)
  ((:pre (typecheck uri (or quri:uri string))
         (typecheck host (or string null))


@@ 221,55 26,15 @@
   (:post (typecheck (car %) types:article-info)
          (typecheck (cadr %) (or string null)))
   (:documentation "Initiate a TLS connection to the host and port indicated by either URI or HOST & PORT, with a request header of URI, and return an TYPES:ARTICLE-INFO."))
  (when (not *ctx*)
    (context-init))
  (let* ((parsed-uri (if (typep uri 'string)
                         (quri:uri uri)
                         uri))
         (host (or host (quri:uri-host parsed-uri)))
         (port (or port (quri:uri-port parsed-uri) 1965))
         (request (quri:render-uri parsed-uri)))
    (log:info "making gemini request to " request)

    (unless (or (not (quri:uri-scheme parsed-uri))
                (string= "gemini" (quri:uri-scheme parsed-uri)))
      (error "Only Gemini requests are supported: ~s" request))
    (unless (and host port)
      (error "Host and port must be present: ~s" request))

    (when (not (quri:uri-scheme parsed-uri))
      (setf request (uiop:strcat "gemini:" request)))

    (usocket:with-client-socket (socket stream host port)
      (ssl:with-global-context (*ctx*)
        (let ((ssl-stream (ssl:make-ssl-client-stream stream
                                                      :verify nil :unwrap-stream-p t :hostname host)))
          ; Write the request and ensure it's transferred
          (write-sequence
            (babel:string-to-octets (format nil "~a~a" request +crlf+)
                                    :encoding :utf-8 :use-bom NIL)
            ssl-stream)
          (force-output ssl-stream)

          ; Retrieve the response header and parse it
          (multiple-value-bind (code meta)
              (parse-response (read-line-crlf ssl-stream))
            (let* ((status (code->status code))
                   ; Always textual because RETRIEVE-BODY raises an error when
                   ; a binary body is found
                   (body (when (eq status :success)
                           (retrieve-body (mimeparse:parse-media-range meta)
                                          ssl-stream))))

              (when ssl-stream
                (handler-case
                    (close ssl-stream)
                  (t () nil)))
              (when stream
                (close stream))
              (values
                (make-instance 'types:article-info
                               :id 0 :user 0 :title (get-title body)
                               :uri request :retrieved (get-universal-time)
                               :code code :meta meta)
                body))))))))
  (log:info "making gemini request to " uri)
  (multiple-value-bind (body status meta)
      (smallnet.gemini:gemini-request uri :host host :port port)
    (when (and (eq :success status)
               (not (string= "text" (mimeparse::media-type meta))))
      (error "Non-textual response type: ~s" uri))
    (values
      (make-instance 'types:article-info
                     :id 0 :user 0 :title (get-title body)
                     :uri request :retrieved (get-universal-time)
                     :code code :meta meta)
      body)))

M lasso.asd => lasso.asd +1 -2
@@ 22,7 22,7 @@
     #:for
     #:arrows
     #:quri

     #:smallnet
     #:sqlite
     #:zstd
     #:usocket


@@ 67,7 67,6 @@
     (:file "test/lasso" :depends-on ("test/package"))
     (:file "test/dbc" :depends-on ("test/package" "test/lasso"))
     (:file "test/db" :depends-on ("test/package" "test/lasso"))
     (:file "test/gemini" :depends-on ("test/package" "test/lasso"))
     (:file "test/config" :depends-on ("test/package" "test/lasso")))

  :perform

M lasso.lisp => lasso.lisp +0 -1
@@ 254,5 254,4 @@ See the *ROUTES* for documentation of the route description returned."))
              (bad-path))))))

  ; Error handler falls through to cleanup code
  (gemini:context-free)
  (db:database-close))

M package.lisp => package.lisp +0 -25
@@ 107,31 107,6 @@
(defpackage :gemini
  (:documentation "Gemini client")
  (:export
    #:code->status
    #:status->code
    #:statusp
    #:codep
    #:code

    #:gemini-response
    #:request
    #:gemini-response-request
    #:gemini-response-code
    #:meta
    #:gemini-response-meta
    #:mime
    #:gemini-response-mime
    #:body
    #:gemini-response-body

    #:malformed-reponse
    #:reason
    #:malformed-reponse-reason
    #:response
    #:malformed-reponse-response

    #:context-init
    #:context-free
    #:gemini-request)
  (:use #:cl
        #:dbc

D test/gemini.lisp => test/gemini.lisp +0 -111
@@ 1,111 0,0 @@
;;;; Copyright (c) 2022 nytpu <alex [at] nytpu.com>
;;;; SPDX-License-Identifier: AGPL-3.0-only
;;;; For more license details, see LICENSE or <https://www.gnu.org/licenses/agpl-3.0.html>.
(cl:in-package :gemini/test)

(def-suite* gemini :in lasso/test:main-system)

(test statuses
  (let ((real-statuses '(:input :input-sensitive :success :redirect-temporary
                         :redirect-permanent :temporary-failure
                         :server-unavailable :cgi-error :proxy-error :slow-down
                         :permanent-failure :not-found :gone
                         :proxy-request-refused :bad-request
                         :client-cert-required :cert-not-authorized
                         :cert-invalid))
        (invalid-statuses '(:binput :test :hello-world :|input| :succ-ess)))
    (dolist (status real-statuses)
      (is-true (gemini:statusp status))
      (is (not (eq :invalid-response-status (gemini:status->code status))))
      (is (eq status
              (-> status
                gemini:status->code
                gemini:code->status))))
    (dolist (status invalid-statuses)
      (is-false (gemini:statusp status))
      (is (eq :invalid-response-status
              (gemini:status->code status))))))

(test codes
  (let* ((real-codes '(10 11 20 30 31 40 41 42 43 44 50 51 52 53 59 60 61 62))
         (invalid-codes (-> (for ((i from 0 :to 100)
                                  (l collecting i)))
                          (nset-difference real-codes :test #'=))))
    (dolist (code real-codes)
      (is-true (gemini:codep code))
      (is-true (typep code 'gemini:code))
      (is (not (eq :invalid-response-status (gemini:code->status code))))
      (is (eq code
              (-> code
                gemini:code->status
                gemini:status->code))))
    (dolist (code invalid-codes)
      (is-false (gemini:codep code))
      (is-false (typep code 'gemini:code))
      (is (eq :invalid-response-status
              (gemini:code->status code))))))

(defmacro with-temp-stream ((stream) &body body)
  `(uiop:call-with-temporary-file
     (lambda (,stream) ,@body)
     :want-stream-p t :want-pathname-p nil
     :element-type '(unsigned-byte 8)))

(defun write-binary (stream string)
  (write-sequence (map 'vector #'char-code string)
                  stream))

(test read-line-crlf
  (with-temp-stream (stream)
    (write-binary stream
                  (format nil "hello, world!~c~c" #\return #\linefeed))
    (file-position stream :start)
    (is (string= (format nil "hello, world!~a" gemini::+crlf+)
                 (gemini::read-line-crlf stream t))))

  (with-temp-stream (stream)
    (write-binary stream
                  (format nil "hello, world!~c" #\linefeed))
    (file-position stream :start)
    (signals end-of-file
      (gemini::read-line-crlf stream t))
    (file-position stream :start)
    (is (string= (format nil "hello, world!~c" #\linefeed)
                 (gemini::read-line-crlf stream nil)))))

(test parse-response
  (multiple-value-bind (code meta)
      (gemini::parse-response
        (format nil "20 text/gemini; charset=utf-8; lang=en-US~c~c"
                #\return #\linefeed))
    (is (= 20 code))
    (is (string= meta "text/gemini; charset=utf-8; lang=en-US")))

  (multiple-value-bind (code meta)
      (gemini::parse-response (format nil "51 ~a" gemini::+crlf+))
    (is (= 51 code))
    (is (string= "" meta)))

  (signals gemini::malformed-response
    (gemini::parse-response "20 text/gemini; charset=utf-8; lang=en-US"))
  (signals gemini::malformed-response
    (gemini::parse-response (format nil "51~a" gemini::+crlf+)))
  (signals gemini::malformed-response
    (gemini::parse-response (format nil "99 test~a" gemini::+crlf+))))

(test read-all
  (with-temp-stream (stream)
    (for ((i repeat 500))
      (write-binary stream (format nil "hello, world!!!!~a" gemini::+crlf+)))
    (is (equalp #() (gemini::read-all stream)))
    (file-position stream :start)
    (is (= (* 18 500)
           (-> stream
             gemini::read-all
             length)))))

(test retrieve-body
  (skip "TODO"))

(test gemini-request
  (skip "Don't want to rely on networking nor do I want to mock network connections"))

M test/package.lisp => test/package.lisp +0 -6
@@ 11,12 11,6 @@
  (:use #:cl
        #:fiveam))

(defpackage :gemini/test
  (:use #:cl
        #:fiveam
        #:arrows
        #:for))

(defpackage :config/test
  (:use #:cl
        #:fiveam))

M types.lisp => types.lisp +1 -1
@@ 61,7 61,7 @@
               :documentation "The datetime the article was saved, in universal time format in the UTC+0 time zone")
   (title :type (or string null) :initarg :title :accessor article-info-title
          :documentation "The extracted title of the document, or NIL if no title was extracted")
   (code :type (gemini:code) :initarg :code :accessor article-info-code
   (code :type (integer 0 62) :initarg :code :accessor article-info-code
         :documentation "The response code component of the response header.")
   (meta :type string :initarg :meta :accessor article-info-meta
         :documentation "The meta component of the response header.")