M crawler.lisp => crawler.lisp +27 -2
@@ 1,4 1,7 @@
-(defvar *wikipedia-api* "https://en.wikipedia.org/w/api.php")
+(in-package wikisophy.crawler)
+
+(defvar *wikipedia-api-url* "https://en.wikipedia.org/w/api.php")
+(defvar *wikipedia-url* "https://en.wikipedia.org")
(defun query-wikipedia (input)
"Given a string input return the url of the most relevant wikipedia
@@ 11,7 14,7 @@
(cadddr
(cl-json:decode-json-from-string
(map 'string #'code-char
- (drakma:http-request *wikipedia-api* :parameters parameters)))))))
+ (drakma:http-request *wikipedia-api-url* :parameters parameters)))))))
(defun parse-html (url)
"Given a wikipedia url parse the html into a PLUMP-DOM element"
@@ 52,3 55,25 @@
do (if (validate-link a p)
(return-from outer
(aref (lquery:$ a (attr :href)) 0)))))))
+
+(defun title-from-url (url)
+ "Given a wikipedia url return the articles title."
+ (let ((mark (search "/wiki/" url)))
+ (subseq url (+ 6 mark)))) ; 6 denotes the length of "/wiki/"
+
+(defun path-to-philosophy (input)
+ "Given a string as an input return a list that shows the path from
+ the first wikipedia article to Philosophy. Endings include detecting
+ a cycle or a page without a first link."
+ (let ((url (query-wikipedia input))
+ (end (concatenate 'string *wikipedia-url* "/wiki/Philosophy")))
+ (if url
+ (loop while (string-not-equal url end)
+ when (member (title-from-url url) titles-list :test #'string-equal)
+ do (return (append titles-list '("Cycle detected!")))
+ when (not url)
+ do (return (append titles-list '("No links page detected!")))
+ collect (title-from-url url) into titles-list
+ do (setq url (concatenate 'string *wikipedia-url* (grab-link url)))
+ finally (return (append titles-list '("Philosophy"))))
+ '("There is no article to start crawling from."))))
M packages.lisp => packages.lisp +2 -1
@@ 1,2 1,3 @@
(defpackage wikisophy.crawler
- (:use :cl))
+ (:use :cl)
+ (:export :path-to-philosophy))