~bendersteed/wikisophy

7252f83a28aaf13900534ead385191ee0d972a89 — Dimakakos Dimos 4 years ago 9bfa8e3
Fix: handling of pages without links
1 files changed, 6 insertions(+), 5 deletions(-)

M crawler.lisp
M crawler.lisp => crawler.lisp +6 -5
@@ 50,7 50,7 @@
    2. Ignores external links, links to the current page, or red 
       links (links to non-existent pages)"
  (let ((content (parse-html url)))
    (loop named outer for p across (lquery:$ content "#mw-content-text p")
    (loop named outer for p across (lquery:$ content "#mw-content-text p") ;TODO: also handle ul links
	  do (loop named inner for a across (lquery:$ p "a")
		   do (if (validate-link a p)
			  (return-from outer


@@ 59,7 59,8 @@
(defun title-from-url (url)
  "Given a wikipedia url return the articles title."
  (let ((mark (search "/wiki/" url)))
    (subseq url (+ 6 mark)))) ; 6 denotes the length of "/wiki/"
    (when mark
      (subseq url (+ 6 mark))))) ; 6 denotes the length of "/wiki/"

(defun path-to-philosophy (input)
  "Given a string as an input return a list that shows the path from


@@ 71,9 72,9 @@
	(loop while (string-not-equal url end)
	      when (member (title-from-url url) titles-list :test #'string-equal)
		do (return (append titles-list '("Cycle detected!")))
	      when (not url)
		do (return (append titles-list '("No links page detected!")))
	      when (string-equal url *wikipedia-url*)
		do (return (append titles-list '("No links detected in page!")))
	      collect (title-from-url url) into titles-list
	      do (setq url (concatenate 'string *wikipedia-url* (grab-link url)))
	      do (setq url (concatenate 'string *wikipedia-url* (grab-link url))) 
	      finally (return (append titles-list '("Philosophy"))))
	'("There is no article to start crawling from."))))