~brettgilio/org-webring

6835ff2e94d7027596e8b1134506079e8fd32579 — Jamie Beardslee 24 days ago cabcb8d
Add support for atom feeds.

* New function: --type determines whether FEED is an rss or atom feed.
* New function: --source-link gets the source link of FEED.  This was
  split from --get-items-from-url due to the messiness of getting the
  link from an atom feed.
* --feed-items and --feed-parse now take a TYPE argument.  This is
  automatically determined in --get-items-from-url via --type
* --feed-parse no longer fetches the feed.  This has been moved to
  --get-items-from-url in order to retain the type.
* --article-instance checks for one of "description", "content", or
  "summary" tags
1 files changed, 50 insertions(+), 22 deletions(-)

M org-webring.el
M org-webring.el => org-webring.el +50 -22
@@ 85,7 85,7 @@
  :type 'integer)

(defcustom org-webring-items-per-source 1
  "How many entries should be extracted from each RSS feed."
  "How many entries should be extracted from each feed."
  :group 'org-webring
  :type 'integer)



@@ 95,7 95,7 @@
  :type 'string)

(defcustom org-webring-urls '()
  "The URLs which will be scraped for RSS feeds."
  "The URLs which will be scraped for feeds."
  :group 'org-webring
  :type '(repeat string))



@@ 161,33 161,58 @@ XML node."
  (org-webring--xml-node-text
   (org-webring--xml-get-child feed prop)))

(defun org-webring--feed-items (feed)
  "Extract only the items of a parsed RSS FEED."
  (xml-get-children feed 'item))

(defun org-webring--feed-parse (link)
  "Parse a RSS feed and return the 'channel' tag."
  (assq 'channel
	(assq 'rss
	      (with-temp-buffer
		(url-insert-file-contents link)
		(xml-parse-region)))))
(defun org-webring--type (feed)
  "Determine whether FEED is an RSS or Atom feed.
FEED must alread be parsed.
Return `rss' if it is an RSS feed, and `atom' if it is an
Atom feed."
  (if (assoc 'rss feed)
      'rss
    'atom))

(defun org-webring--feed-items (feed type)
  "Extract only the items of a parsed FEED.
Return the `item' tag if TYPE is `rss', otherwise return the
`entry' tag."
  (xml-get-children feed (if (eq 'rss type)
			     'item
			   'entry)))

(defun org-webring--feed-parse (data type)
  "Parse DATA and return the content.
If TYPE is `rss', return the `channel' tag, otherwise return the
`feed' tag."
  (if (eq type 'rss)
      (assq 'channel
	    (assq 'rss data))
    (assq 'feed data)))

(defun org-webring--source-link (feed type)
  "Return the source link of FEED."
  (if (eq type 'rss)
      (org-webring--feed-text-prop feed 'link)
    (cdaadr (org-webring--xml-get-child feed 'link))))

(defun org-webring--get-items-from-url (url)
  "Create a list of items contained in the RSS feed at URL.
  "Create a list of items contained in the feed at URL.
Produces the value set by `org-webring-items-per-source'."
  (let* ((feed (org-webring--feed-parse url))
         (source-link (org-webring--feed-text-prop feed 'link))
         (source-title (org-webring--feed-text-prop feed 'title)))
  (let* ((url-content
	  (with-temp-buffer
	    (url-insert-file-contents url)
	    (xml-parse-region)))
	 (type (org-webring--type url-content))
	 (feed (org-webring--feed-parse url-content type))
	 (source-link (org-webring--source-link feed type))
	 (source-title (org-webring--feed-text-prop feed 'title)))
    (seq-map (lambda (item)
               `(item                   ; tag
	       `(item                   ; tag
                 nil                    ; class
                 ;; children
                 (sourceLink nil ,source-link)
                 (sourceTitle nil ,source-title)
                 ,@(xml-node-children item)))
             (seq-take (org-webring--feed-items feed)
                       org-webring-items-per-source))))
	     (seq-take (org-webring--feed-items feed type)
		       org-webring-items-per-source))))

(defun org-webring--string-truncate (len s elipsis)
  "If S is longer than LEN, cut it down and add ELIPSIS at the


@@ 203,13 228,16 @@ end. Taken from the s.el library."
(defun org-webring--pub-time (item)
  "Get ITEM's publication time."
  (org-webring--date->time
   (org-webring--feed-text-prop item 'pubDate)))
   (or (org-webring--feed-text-prop item 'pubDate)
       (org-webring--feed-text-prop item 'updated))))

(defun org-webring--article-instance (item)
  "Generate the structure of a feed article from a given ITEM."
  (let ((desc-sanitized
	 (with-temp-buffer
	   (insert (org-webring--feed-text-prop item 'description))
	   (insert (or (org-webring--feed-text-prop item 'description)
		       (org-webring--feed-text-prop item 'content)
		       (org-webring--feed-text-prop item 'summary)))
	   (apply #'concat (dom-strings
			    (libxml-parse-html-region (point-min)
						      (point-max)))))))