~bendersteed/wikisophy

9bfa8e3c1a82af958fe603e40e159e26b9c8b9b3 — Dimakakos Dimos 4 years ago 0a58763
Add: path-to-philosophy, crawler is mostly ready
2 files changed, 29 insertions(+), 3 deletions(-)

M crawler.lisp
M packages.lisp
M crawler.lisp => crawler.lisp +27 -2
@@ 1,4 1,7 @@
(defvar *wikipedia-api* "https://en.wikipedia.org/w/api.php")
(in-package wikisophy.crawler)

(defvar *wikipedia-api-url* "https://en.wikipedia.org/w/api.php")
(defvar *wikipedia-url* "https://en.wikipedia.org")

(defun query-wikipedia (input)
  "Given a string input return the url of the most relevant wikipedia


@@ 11,7 14,7 @@
     (cadddr
      (cl-json:decode-json-from-string
       (map 'string #'code-char
	    (drakma:http-request *wikipedia-api* :parameters parameters)))))))
	    (drakma:http-request *wikipedia-api-url* :parameters parameters)))))))

(defun parse-html (url)
  "Given a wikipedia url parse the html into a PLUMP-DOM element"


@@ 52,3 55,25 @@
		   do (if (validate-link a p)
			  (return-from outer
			    (aref (lquery:$ a (attr :href)) 0)))))))

(defun title-from-url (url)
  "Given a wikipedia url return the articles title."
  (let ((mark (search "/wiki/" url)))
    (subseq url (+ 6 mark)))) ; 6 denotes the length of "/wiki/"

(defun path-to-philosophy (input)
  "Given a string as an input return a list that shows the path from
  the first wikipedia article to Philosophy. Endings include detecting
  a cycle or a page without a first link."
  (let ((url (query-wikipedia input))
	(end (concatenate 'string *wikipedia-url* "/wiki/Philosophy")))
    (if url
	(loop while (string-not-equal url end)
	      when (member (title-from-url url) titles-list :test #'string-equal)
		do (return (append titles-list '("Cycle detected!")))
	      when (not url)
		do (return (append titles-list '("No links page detected!")))
	      collect (title-from-url url) into titles-list
	      do (setq url (concatenate 'string *wikipedia-url* (grab-link url)))
	      finally (return (append titles-list '("Philosophy"))))
	'("There is no article to start crawling from."))))

M packages.lisp => packages.lisp +2 -1
@@ 1,2 1,3 @@
(defpackage wikisophy.crawler
  (:use :cl))
  (:use :cl)
  (:export :path-to-philosophy))