~amirouche/ruse-babeltoy

eed0e7da9b444f99b8c0f219882294305a1425bc — Amirouche 1 year, 3 months ago ad5823c main
wip
5 files changed, 100 insertions(+), 28 deletions(-)

A babelbot.py
D babeltoy.pdf
M babeltoy.scm
M main.scm
M src/index.scm
A babelbot.py => babelbot.py +71 -0
@@ 0,0 1,71 @@
import os
import random
import time
import JustIRC


bot = JustIRC.IRCConnection()


def make_babeltoy():
    import time
    import pexpect
    command = "./babeltoy 3"
    print('* Getting started...')
    child = pexpect.spawn(command, timeout=None)
    print('* Ready to babel...')
    while True:
        query, messager = yield 'ready'
        count = 0
        print("** {}".format(query))
        child.sendline(query)
        ignored = child.readline()
        count = int(child.readline())
        while True:
            if count == 0:
                break
            line = child.readline()
            line = line.decode('utf8').strip()
            messager(line)
            time.sleep(random.random())
            count -= 1


babeltoy = make_babeltoy()
babeltoy.send(None)

BABELBOT_SECRET = os.environ.get('BABELBOT_SECRET', 'secret')


@bot.on("connect")
def connect(e):
    bot.set_nick("babelbot")
    bot.send_user_packet("babelbot")
    bot.send_message('nickserv', 'identify babelbot {}'.format(BABELBOT_SECRET))


@bot.on("welcome")
def welcome(e):
    bot.join_channel("#ruse")
    bot.join_channel("#scheme")
    bot.join_channel("#peacesearch")


@bot.on("message#")
def message(e):
    message = e.message.lower()

    if not message.startswith('babelbot: '):
        return

    query = ' '.join(' '.join(message.split(':')[1:]).split())
    if len(query) > 260:
        message = "meh.."
        bot.send_message(e.channel, message)
        return

    babeltoy.send((query, lambda hit: bot.send_message(e.channel, hit)))


bot.connect("irc.libera.chat")
bot.run_loop()

D babeltoy.pdf => babeltoy.pdf +0 -0
M babeltoy.scm => babeltoy.scm +19 -18
@@ 6,7 6,7 @@
(import (warc))
(import (okvslite))
(import (index))
(import (pool))
;;(import (pool))


(define db #f)


@@ 86,7 86,7 @@
          (case (car chars)
            ((#\newline #\space) (loop (cdr chars)))
            (else #f)))))
  

  (define (recurse sxml)
    (cond
     ((or (null? sxml)


@@ 124,7 124,7 @@
      (append-map flatten objs))
     ((pair? objs) (list objs))
     (else '())))
  

  (define scored (filter cdr (flatten (recurse sxml))))

  (define (make-bag-of-words string boost)


@@ 155,7 155,7 @@
            (hashtable-set! bag (car keys) (fx* score boost))
            (loop (cdr keys)))))
      bag)
        

    (define (list->bag objs)
      (define out (make-hashtable equal-hash equal?))
      (let loop ((objs objs))


@@ 168,7 168,7 @@

    (list->bag (tokenize string)))

 

  (define bags (map (lambda (boost+string)
                      (make-bag-of-words (cdr boost+string)
                                         (car boost+string)))


@@ 186,7 186,7 @@
                           (hashtable-cells (car bags)))
          (loop (cdr bags))))
      bag))
  

  (merge! bags))

(define (bag->alist bag)


@@ 236,16 236,16 @@
    (<= 3 (string-length token) 255))

  (filter interesting?
          (map car 
          (map car
               (string->bag-of-words string))))

(define (index! url sxml)
  (define start (okvslite-begin db 0))
  ;; TODO: Skip if URL contains a query string
  (define uid (random-uid))
  

  (define tokens (string->tokens (sxml->human-readable-text sxml)))
  

  (define ignored (for-each (lambda (token) (index-document-frequency! db token)) tokens))

  (define (add! token)


@@ 253,7 253,7 @@
    (index-backward-add! db token uid))

  (for-each add! tokens)
  

  (okvslite-commit db 0))

(define (string-contains? string char)


@@ 304,7 304,7 @@

(define (index-search tokens)
  (define document-total (inexact (index-document-frequency-total db)))
  

  (define (lft tokens frequency token) ;; wanna be least frequent token
    (if (null? tokens)
        token


@@ 349,13 349,14 @@
      (begin
        (display "1\nmeh...\n")
        (read-query-search-print-loop))
      (let ((hits (index-search tokens)))
        (display (length hits))(newline)
        (for-each
         (lambda (a) (format #t "~3f - ~a\n" (cdr a) (car a)))
         hits)
        ;;(for-each print hits)
        (read-query-search-print-loop))))
      (begin
        (let ((hits (index-search tokens)))
          (display (length hits))(newline)
          (for-each
           (lambda (a) (format #t "~3f - ~a\n" (cdr a) (car a)))
           hits)
          ;;(for-each print hits)
          (read-query-search-print-loop)))))


(if (and (not (null? (command-line-arguments)))

M main.scm => main.scm +1 -1
@@ 10,7 10,7 @@
(define (for-each-map f p objs)
  (for-each f (map p objs)))

(define p (make-pool 5))
(define p (make-pool 23))

(time (pool-for-each-parallel-map p
                                  pk

M src/index.scm => src/index.scm +9 -9
@@ 23,18 23,18 @@
  (define (index-open filename)
    (let ((db (okvslite-new)))
      (okvslite-config db 2 (expt 1024 2))
      (okvslite-config db 4 65536)      
      (okvslite-config db 8 0) ;; no log
      ;; (okvslite-config db 11 0) ;; no multiple process
      (okvslite-config db 4 65536)
      (okvslite-config db 8 1) ;; no log
      (okvslite-config db 11 1) ;; no multiple process
      (okvslite-open db filename)
      db))

  (define (index-open-read-only filename)
    (let ((db (okvslite-new)))
      (okvslite-config db 2 (expt 1024 3))
      (okvslite-config db 4 65536)      
      (okvslite-config db 8 0) ;; no log
      ;; (okvslite-config db 11 0) ;; no multiple process
      (okvslite-config db 2 (expt 1024 2))
      (okvslite-config db 4 65536)
      (okvslite-config db 8 1) ;; no log
      (okvslite-config db 11 1) ;; no multiple process
      (okvslite-config db 16 1) ;; readonly
      (okvslite-open db filename)
      db))


@@ 104,8 104,8 @@
                  (begin (okvslite-cursor-next cursor)
                         (loop (fx+ out 1)))
                  (begin (okvslite-cursor-close cursor) out)))))))
    
  


  (define (index-backward-add! db token uid)
    (okvslite-insert db (pack *backward* token uid) #vu8(0)))