~deimos/madcow-bot

a69978dbeae5b26520d79d7878d7e8c9f7137e7e — Chris Jones 8 years ago 9106e1b
fix modules: stocks, urban, wikihow, wikipedia, wikiquotes, youtube title scraper
M madcow/conf/defaults.py => madcow/conf/defaults.py +1 -1
@@ 112,7 112,7 @@ TASKS = [
        ]

# list of modules (from MODULES above) that only respond in private message
PRIVATE_MODULES = ['company', 'realname', 'notes', 'xray'] 
PRIVATE_MODULES = ['company', 'realname', 'notes', 'xray']

MODULE_SUPERCEDES = {}
TASK_SUPERCEDES = {}

M madcow/modules/stockquote.py => madcow/modules/stockquote.py +14 -30
@@ 2,30 2,29 @@

from madcow.util import Module, strip_html
from urllib import quote
from madcow.util.color import ColorLib

import locale
import csv
import re

class UnknownSymbol(Exception):
    """Unknown Symbol Exception"""
    def __init__(self, symbolname):
        super(UnknownSymbol, self).__init__()
        self.symbolname = symbolname

class Main(Module):
    pattern = re.compile(u'^\s*(?:stocks?|quote)\s+([ .=a-zA-Z0-9^]+)', re.I)
    require_addressing = True
    help = u'quote <symbol> - get latest stock quote'

class Yahoo(object):
    # query parameters are s: symbol n: name p: prev. close l: last trade with time (15 minute delay)
    # a mostly-accurate listing of possible parameters is available here: http://www.gummy-stuff.org/Yahoo-data.htm
    _quote_url = u'http://download.finance.yahoo.com/d/quotes.csv?s=SYMBOL&f=snpl&e=.csv'

    def __init__(self, colorlib):
        self.colorlib = colorlib
    def response(self, nick, args, kwargs):
        kwargs['req'].blockquoted = True
        return unicode(self.get_quote(args[0]))

    def get_quote(self, symbols):
        """Looks up the symbol from finance.yahoo.com, returns formatted result"""
        symbols = [quote(symbol) for symbol in symbols.split()]
        url = Yahoo._quote_url.replace(u'SYMBOL', "+".join(symbols))
        url = self._quote_url.replace(u'SYMBOL', "+".join(symbols))
        page = self.geturl(url)

        results = []


@@ 33,7 32,10 @@ class Yahoo(object):
            data = csv.reader([line]).next()
            symbol = data[0]
            name = data[1]
            trade_time, last_trade = strip_html(data[3]).split(" - ")
            try:
                trade_time, last_trade = strip_html(data[3]).split(" - ")
            except ValueError:
                continue
            last_trade = locale.atof(last_trade)
            try:
                last_close = locale.atof(data[2])


@@ 59,25 61,7 @@ class Yahoo(object):
                    color = u'green'
                else:
                    color = u'white'
                text = self.colorlib.get_color(color, text=u'%.2f (%+.2f %+.2f%%)' % (last_trade, delta, delta_perc))
                text = self.madcow.colorlib.get_color(color, text=u'%.2f (%+.2f %+.2f%%)' % (last_trade, delta, delta_perc))
                results.append(u'%s (%s) - Open: %.2f | %s: %s' % (name, symbol, last_close, trade_time, text))


        return u'\n'.join(results)


class Main(Module):
    pattern = re.compile(u'^\s*(?:stocks?|quote)\s+([ .=a-zA-Z0-9^]+)', re.I)
    require_addressing = True
    help = u'quote <symbol> - get latest stock quote'

    def init(self):
        self.yahoo = Yahoo(self.madcow.colorlib)

    def response(self, nick, args, kwargs):
        query = args[0]
        try:
            response = unicode(self.yahoo.get_quote(query))
        except UnknownSymbol, error:
            response = u"Symbol %s not found, market may have crashed" % error.symbolname
        return response

M madcow/modules/urban.py => madcow/modules/urban.py +1 -0
@@ 5,6 5,7 @@ import re

from madcow.util import Module, strip_html
from madcow.util.text import decode
from madcow.util.http import geturlopt

RESULTS_PER_PAGE = 7


M madcow/modules/wikihow.py => madcow/modules/wikihow.py +1 -1
@@ 31,7 31,7 @@ class Main(Module):
            import traceback
            traceback.print_exc()
            results = ['i has an aerror when wikihowing. it may be down, who knows.']
        return os.linesep.join('{}: {}'.format(nick, line) for line in results if line)
        return u'\n'.join(u'{}: {}'.format(nick, line) for line in results if line)

    def getrandom(self, times=3):
        """".how (times) - Gives you random instructions from wikiHow, by default 3 steps"""

M madcow/modules/wikimedia.py => madcow/modules/wikimedia.py +82 -91
@@ 6,20 6,51 @@ from madcow.util.text import *
from urlparse import urljoin
import re

# wiki configuration
WIKIS = {'wikipedia': {
             'keys': ['wp', 'wiki', 'wikipedia'],
             'baseurl': 'http://en.wikipedia.org/',
             'kwargs': {
                 'random': '/wiki/Special:Random',
                 'search': '/wiki/Special:Search',
                 'advert': ' - Wikipedia, the free encyclopedia',
                 'error': 'Search results',
                 },
             },
         }

class WikiMedia(object):
class Main(Module):

    """Autoloaded by Madcow"""

    def make_help(wikis):
        """Generate madcow help from wiki config"""
        help = []
        for wiki, opts in wikis.iteritems():
            item = []
            if len(opts['keys']) > 1:
                item.append('<')
            item.append('|'.join(opts['keys']))
            if len(opts['keys']) > 1:
                item.append('>')
            if opts['kwargs']['search']:
                item.append(' ')
            if opts['kwargs']['random']:
                item.append('[')
            else:
                item.append('<')
            item.append('query')
            if opts['kwargs']['random']:
                item.append(']')
            else:
                item.append('>')
            item.append(' - search ' + wiki)
            help.append(''.join(item))
        return '\n'.join(help)

    wikiconf = {
            'wikipedia': {
                'keys': ['wp', 'wiki', 'wikipedia'],
                'baseurl': 'http://en.wikipedia.org/',
                'kwargs': {
                    'random': '/wiki/Special:Random',
                    'search': '/wiki/Special:Search',
                    'advert': ' - Wikipedia, the free encyclopedia',
                    },
                },
            }

    pattern = Module._any
    terminate = False
    help = make_help(wikiconf)
    match_fmt = r'^\s*(?:%s)(?:\s+(.+?))?\s*$'

    citations_re = re.compile(r'\[.*?\]', re.DOTALL)
    parens_re = re.compile(r'\(.*?\)', re.DOTALL)


@@ 29,23 60,45 @@ class WikiMedia(object):
    summary_size = 400
    scripts_re = re.compile(r'<script [^>]+>.*?</script>', re.I | re.DOTALL)

    def __init__(self, baseurl, **kwargs):
        self.baseurl = baseurl
        self.__dict__.update(kwargs)
    def init(self):
        self.wikis = {}
        for wiki, opts in self.wikiconf.iteritems():
            self.wikis[wiki] = {
                    'match_re': re.compile(self.match_fmt % '|'.join(opts['keys']), re.I),
                    'baseurl': opts['baseurl'],
                    'opts': opts['kwargs'],
                    }

    def getsummary(self, query):
        if not self.search:
    def response(self, nick, args, kwargs):
        message = args[0]
        for wiki, opts in self.wikis.iteritems():
            try:
                query = opts['match_re'].search(message).group(1)
                if query:
                    func, args = self.getsummary, (query,)
                else:
                    func, args = self.getrandom, ()
                res = func(*args, **dict(opts['opts'], baseurl=opts['baseurl']))
                if res:
                    return u'%s: %s' % (nick, res)
            except AttributeError:
                pass

    def getsummary(self, query, **kwargs):
        if not kwargs['search']:
            return u"i don't know how to search this wiki!"
        url = urljoin(kwargs['baseurl'], kwargs['search'])
        opts = {'search': query, 'go': 'Go'}
        return self._getsummary(self.search_url, opts=opts)
        return self._getsummary(url, opts=opts, **kwargs)

    def getrandom(self):
        if not self.random:
    def getrandom(self, **kwargs):
        if not kwargs['random']:
            return u"i don't know where random pages are on this wiki!"
        return self._getsummary(self.random_url)
        url = urljoin(kwargs['baseurl'], kwargs['random'])
        return self._getsummary(url, **kwargs)

    def _getsummary(self, url, opts=None):
        soup, title = self._getpage(url, opts)
    def _getsummary(self, url, opts=None, **kwargs):
        soup, title = self._getpage(url, opts, **kwargs)

        spam = soup.find('div', attrs={'id': 'siteNotice'})
        if spam is not None:


@@ 70,8 123,8 @@ class WikiMedia(object):
            summary += ' ' + sentence
        return summary

    def _getpage(self, url, opts=None):
        page = self.geturl(url, referer=self.baseurl, opts=opts)
    def _getpage(self, url, opts=None, **kwargs):
        page = self.geturl(url, referer=kwargs['baseurl'], opts=opts)
        # HTMLParser doesn't handle this very well.. see:
        # http://www.crummy.com/software/BeautifulSoup/3.1-problems.html
        page = self.scripts_re.sub('', page)


@@ 79,8 132,8 @@ class WikiMedia(object):

        # get page title
        title = soup.title.string
        if self.advert and self.advert in title:
            title = title.replace(self.advert, '')
        if kwargs['advert'] and kwargs['advert'] in title:
            title = title.replace(kwargs['advert'], '')

        # remove all tabular data/sidebars
        for table in soup.findAll('table'):


@@ 109,67 162,5 @@ class WikiMedia(object):
        return soup, title

    @property
    def search_url(self):
        return urljoin(self.baseurl, self.search)

    @property
    def random_url(self):
        return urljoin(self.baseurl, self.random)


def make_help(wikis):
    """Generate madcow help from wiki config"""
    help = []
    for wiki, opts in wikis.iteritems():
        item = []
        if len(opts['keys']) > 1:
            item.append('<')
        item.append('|'.join(opts['keys']))
        if len(opts['keys']) > 1:
            item.append('>')
        if opts['kwargs']['search']:
            item.append(' ')
        if opts['kwargs']['random']:
            item.append('[')
        else:
            item.append('<')
        item.append('query')
        if opts['kwargs']['random']:
            item.append(']')
        else:
            item.append('>')
        item.append(' - search ' + wiki)
        help.append(''.join(item))
    return '\n'.join(help)


class Main(Module):

    """Autoloaded by Madcow"""

    pattern = Module._any
    terminate = False
    help = make_help(WIKIS)
    match_fmt = r'^\s*(?:%s)(?:\s+(.+?))?\s*$'

    def init(self):
        self.wikis = {}
        for wiki, opts in WIKIS.iteritems():
            match_re = self.match_fmt % '|'.join(opts['keys'])
            match_re = re.compile(match_re, re.I)
            handler = WikiMedia(opts['baseurl'], **opts['kwargs'])
            self.wikis[wiki] = {'match_re': match_re, 'handler': handler}

    def response(self, nick, args, kwargs):
        message = args[0]
        for wiki, opts in self.wikis.iteritems():
            try:
                query = opts['match_re'].search(message).group(1)
                if query:
                    response = opts['handler'].getsummary(query)
                else:
                    response = opts['handler'].getrandom()
                if response:
                    return u'%s: %s' % (nick, response)
            except AttributeError:
                pass

M madcow/modules/wikiquotes.py => madcow/modules/wikiquotes.py +26 -40
@@ 1,26 1,31 @@
"""Plugin to return random quote from WikiQuotes"""

from urlparse import urljoin

import random
import re

from BeautifulSoup import BeautifulSoup
from madcow.util import strip_html, Module

_pattern = re.compile(r'^\s*(?:wikiquote|wq)\s*(?:\s+(.*?)\s*)?$', re.I)
_linebreak = re.compile(r'[\r\n]+')
_whitespace = re.compile(r'\s{2,}')
_author = u'random'
_max = 10

class Wiki(object):
class Main(Module):

    base_url = u'http://en.wikiquote.org/'
    advert = u' - Wikiquote'

    pattern = re.compile(r'^\s*(?:wikiquote|wq)\s*(?:\s+(.*?)\s*)?$', re.I)
    _linebreak = re.compile(r'[\r\n]+')
    _whitespace = re.compile(r'\s{2,}')
    _author = u'random'
    _max = 10

    """Return summary from WikiMedia projects"""
    require_addressing = True
    help = u'wikiquote - get random quote from wikiquotes'

    # site-specific details, default is english wikipedia
    base_url = u'http://en.wikipedia.org/'
    random_path = u'/wiki/Special:Random'
    search_path = u'/wiki/Special:Search'
    advert = u' - Wikipedia, the free encyclopedia'
    error = u'Search results'

    # size of response


@@ 35,9 40,6 @@ class Wiki(object):
    _sentence = re.compile(r'(.*?\.)\s+', re.DOTALL)
    _fix_punc = re.compile(r'\s+([,;:.])')

    def __init__(self, **kwargs):
        self.__dict__.update(kwargs)

    def get_summary(self, query):
        soup, title = self.get_soup(query)



@@ 67,10 69,10 @@ class Wiki(object):

        # clean up rendered text
        content = strip_html(content)                 # strip markup
        content = Wiki._citations.sub(u'', content)   # remove citations
        content = Wiki._parens.sub(u'', content)      # remove parentheticals
        content = Wiki._whitespace.sub(u' ', content) # compress whitespace
        content = Wiki._fix_punc.sub(r'\1', content) # fix punctuation
        content = self._citations.sub(u'', content)   # remove citations
        content = self._parens.sub(u'', content)      # remove parentheticals
        content = self._whitespace.sub(u' ', content) # compress whitespace
        content = self._fix_punc.sub(r'\1', content) # fix punctuation
        content = content.strip()                    # strip whitespace

        # search error


@@ 79,7 81,7 @@ class Wiki(object):

        # generate summary by adding as many sentences as possible before limit
        summary = u'%s -' % title
        for sentence in Wiki._sentence.findall(content):
        for sentence in self._sentence.findall(content):
            if len(summary) + 1 + len(sentence) > self.summary_size:
                break
            summary += u' %s' % sentence


@@ 96,8 98,7 @@ class Wiki(object):
        else:
            opts = {u'search': query, u'go': u'Go'}
            url = urljoin(self.base_url, self.search_path)
        page = self.geturl(url, referer=self.base_url, opts=opts,
                      size=self.sample_size)
        page = self.geturl(url, referer=self.base_url, opts=opts, size=self.sample_size)

        # create BeautifulSoup document tree
        soup = BeautifulSoup(page)


@@ 126,32 127,17 @@ class Wiki(object):
            span.extract()
        for link in soup.findAll(u'a', text=u'IPA'):
            link.extract()
        for span in soup.findAll(u'span', attrs={u'class': Wiki._audio}):
        for span in soup.findAll(u'span', attrs={u'class': self._audio}):
            span.extract()

        return soup, title


class WikiQuotes(Wiki):

    base_url = u'http://en.wikiquote.org/'
    advert = u' - Wikiquote'


class Main(Module):

    pattern = _pattern
    require_addressing = True
    help = u'wikiquote - get random quote from wikiquotes'

    def init(self):
        self.wiki = WikiQuotes()

    def get_random_quote(self, author=_author, max=_max):
        for i in range(0, max):
            try:
                return self._get_random_quote(author=author)
            except:
                raise
                pass
        raise Exception(u'no parseable page found :(')



@@ 161,14 147,14 @@ class Main(Module):
        contents = [unicode(part) for part in contents]
        quote = u' '.join(contents)
        quote = strip_html(quote)
        quote = _linebreak.sub(u' ', quote)
        quote = _whitespace.sub(u' ', quote)
        quote = self._linebreak.sub(u' ', quote)
        quote = self._whitespace.sub(u' ', quote)
        quote = quote.strip()
        return quote

    def _get_random_quote(self, author=_author):
        soup, title = self.wiki.get_soup(author)
        if title == self.wiki.error:
        soup, title = self.get_soup(author)
        if title == self.error:
            return u"Couldn't find quotes for that.."
        content = soup.find(u'div', attrs={u'id': u'mw-content-text'})
        uls = content.findAll(u'ul', recursive=False)

M madcow/modules/youtube.py => madcow/modules/youtube.py +2 -4
@@ 19,9 19,6 @@ class Main(Module):
    allow_threading = True
    terminate = False

    def __init__(self, bot):
        self.bot = bot

    def response(self, nick, args, kwargs):
        try:
            url = args[0]


@@ 33,8 30,9 @@ class Main(Module):
                soup = self.getsoup(url)
                title = strip_html(decode(soup.title.renderContents())).replace(u' - YouTube', u'').strip()
                if title:
                    self.bot.output(title, kwargs['req'])
                    self.madcow.output(title, kwargs['req'])
        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            raise
            pass