d534b84b168ffe933abc9077fbcd22d13fa2e2b8 — seanl 22 years ago bdfd37e
Now uses unicode entirely internally, with the transport module doing all
conversion to encodings expected by the server.
5 files changed, 547 insertions(+), 88 deletions(-)

M OpenVerse.py
M froggirl.py
M microhal.py
M pvui_pygame.py
M webutil.py
M OpenVerse.py => OpenVerse.py +22 -12
@@ 16,11 16,12 @@
# along with PythonVerse; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
import sys, os, asyncore, asynchat, socket, string, struct, stat
import sys, os, asyncore, asynchat, socket, string, struct, stat, codecs
import transutil

# Global constants are all caps; global variables start with _

ENCODING = 'ISO-8859-1'
HOME = os.path.expanduser('~/.OpenVerse')
ANIMDIR = os.path.join(HOME, 'anims')

@@ 31,6 32,10 @@ OBJDIR = os.path.join(HOME, 'objects')
RIMAGEDIR = os.path.join(HOME, 'rimages')
ROOMDIR = os.path.join(HOME, 'rooms')

text_decode = codecs.lookup(ENCODING)[1]

def decode(s):
    return text_decode(s)[0]

def checkcache(filename, size):
    try: s = os.stat(filename)[stat.ST_SIZE]

@@ 167,7 172,7 @@ class ServerConnection(transutil.Connection):
    def handle_connect(self):
        size = os.stat(self.avatar_filename)[stat.ST_SIZE]
        self.write("AUTH %s %d %d %s %d %d %d %d %d\r\n" %
                  (self.nick, 320, 200, os.path.basename(self.avatar_filename), self.nx, self.ny,
                  (self.nick.encode(ENCODING, 'replace'), 320, 200, os.path.basename(self.avatar_filename), self.nx, self.ny,
                   size, self.bx, self.by))

    def handle_close(self):

@@ 239,7 244,7 @@ class ServerConnection(transutil.Connection):

    def move(self, pos):
        x, y = pos
        self.write('MOVE %s %d %d 20\r\n' % (self.nick, x, y))
        self.write('MOVE %s %d %d 20\r\n' % (self.nick.encode(ENCODING, 'replace'), x, y))

    def push(self):
	self.write('PUSH 100\r\n')

@@ 250,16 255,16 @@ class ServerConnection(transutil.Connection):
    def privmsg(self, nicks, text):
	if type(nicks) == type(''): nicks = [nicks]
	for n in nicks:
	    self.write('PRIVMSG %s %s\r\n' % (n, text))
	    self.write('PRIVMSG %s %s\r\n' % (n.encode(ENCODING, 'replace'), text))
	return self.nick

    def url(self, nicks, url):
	if type(nicks) == type(''): nicks = [nicks]
	for n in nicks:
	    self.write('URL %s %s\r\n' % (n, url))
	    self.write('URL %s %s\r\n' % (n.encode(ENCODING, 'replace'), url))

    def chat(self, text):
        self.write('CHAT %s\r\n' % text)
        self.write('CHAT %s\r\n' % text.encode(ENCODING, 'replace'))

    def set_nick(self, nick):
        self.nick = nick

@@ 331,11 336,11 @@ class ServerConnection(transutil.Connection):

    def cmd_CHAT(self, line):
        cmd, nick, text = line.split(' ', 2)
        self.client.chat(nick, text)
        self.client.chat(decode(nick), decode(text))

    def cmd_SCHAT(self, line):
        cmd, emote, nick, text = line.split(' ', 3)
        self.client.chat(nick, '*%s* %s' % (emote, text))
        self.client.chat(nick, '*%s* %s' % (emote, decode(text)))

    def cmd_MOVE(self, line):
	cmd, nick, x, y, speed = line.split()

@@ 360,6 365,7 @@ class ServerConnection(transutil.Connection):

    def cmd_AVATAR(self, line):
        cmd, nick, filename, nx, ny, size, bx, by = line.split()
	nick = decode(nick)
	nx = int(nx)
	ny = int(ny)
	size = int(size)

@@ 379,10 385,11 @@ class ServerConnection(transutil.Connection):
    def cmd_URL(self, line):
    	cmd, nick, text = line.split(' ', 2)
        self.client.url(nick, text)
        self.client.url(decode(nick), text)

    def cmd_NEW(self, line):
        cmd, nick, x, y, filename, nx, ny, size, bx, by = line.split()
	nick = decode(nick)
	x = int(x)
	y = int(y)
	nx = int(nx)

@@ 399,11 406,11 @@ class ServerConnection(transutil.Connection):

    def cmd_NOMORE(self, line):
        cmd, nick = line.split()

    def cmd_EXIT_OBJ(self, line):
        cmd, name, x1, y1, x2, y2, duration, host, port = line.split()
	self.client.exit_obj(name, host, port)
	self.client.exit_obj(decode(name), host, int(port))

    def cmd_DCCGETAV(self, line):
        cmd, port, filename, size = line.split()

@@ 427,10 434,11 @@ class ServerConnection(transutil.Connection):
    def cmd_ROOMNAME(self, line):
        cmd, name = line.split(' ', 1)

    def cmd_MOUSEOVER(self, line):
        cmd, name, x, y, image1, size1, image2, size2, flag = line.split()
	name = decode(name)
	x = int(x)
	y = int(y)
	size1 = int(size1)

@@ 449,6 457,8 @@ class ServerConnection(transutil.Connection):

    def cmd_WHOIS(self, line):
        cmd, nick, text = line.split(' ', 2)
	nick = decode(nick)
	text = decode(text)
        self.client.chat(nick, '*%s* is %s' % (nick, text))

    def cmd_PUSH(self, line):

M froggirl.py => froggirl.py +41 -11
@@ 21,7 21,7 @@

import sys, os, string, re, asyncore, codecs, traceback, whrandom, time
import OpenVerse, client
import webutil, kanjidic, microhal
import webutil,  microhal

#os.environ['WNHOME'] = '/usr/share/wordnet'
#os.environ['WNSEARCHDIR'] = '/usr/share/wordnet'

@@ 37,8 37,31 @@ languages = {'e': 'en',
	     's': 'es',
	     'j': 'ja'}

fromutf8 = codecs.lookup('UTF8')[1]
fromlatin1 = codecs.lookup('ISO-8859-1')[1]
utf8_decode = codecs.lookup('UTF8')[1]
latin1_decode = codecs.lookup('ISO-8859-1')[1]
euc_jp_decode = codecs.lookup('japanese.euc-jp')[1]

unicode_re = re.compile('#([0-9a-fA-F]{4})')
def fromlatin1(s):
    """Translate a string from latin1 and convert "#xxxx" to unicode 

    l = unicode_re.split(latin1_decode(s)[0])
    for i in range(1, len(l), 2): l[i] = unichr(int(l[i], 16))
    return u''.join(l)

def fromutf8(s): return utf8_decode(s)[0]
def fromeuc(s): return euc_jp_decode(s)[0]

def pipe(c, s):
    """Pass a string to a pipe and return the result."""
    stdin, stdout = os.popen2(c)
    output = stdout.read()
    return output

class HalClient(client.Client):
    commands = [('([efigpsj])((2[efigpsj])+)\s+(.+)$',

@@ 78,7 101,7 @@ class HalClient(client.Client):

    # commands.
    def do_reply(self, sentence):
        reply = self.brain.reply(sentence)
        avg_logprob, reply = self.brain.reply(sentence)
        return reply

@@ 100,10 123,13 @@ class HalClient(client.Client):
    def do_babel(self, lang1, langs, phrase):
	lang1 = languages[lang1]
	r = fromlatin1(phrase)[0].encode('UTF8')
	phrase = fromlatin1(phrase)
	if lang1 == 'ja':
	    # Convert romaji to kana
	    phrase = fromeuc(pipe('./romkan.rb', phrase.encode('japanese.euc-jp', 'ignore')))
	r = phrase.encode('UTF8')
	for l in langs[1:].split('2'):
	    lang2 = languages[l]
	    print 'Translating %s from %s to %s' % (r, lang1, lang2)
	    try: r = webutil.translate(r, lang1, lang2)
		exception, info = sys.exc_info()[:2]

@@ 114,7 140,8 @@ class HalClient(client.Client):
	        print r
	        lang1 = lang2

	return kanjidic.romajify(fromutf8(r)[0]).encode('ISO-8859-1', 'replace')
	#r = fromeuc(pipe('kakasi -Ea -Ha -Ka -Ja', fromutf8(r).encode('japanese.euc-jp', 'replace')))
	return fromutf8(r).encode('ISO-8859-1', 'replace')

    def do_google(self, phrase, groups=0):
        hits = webutil.google(phrase, groups)

@@ 131,7 158,7 @@ class HalClient(client.Client):
        images = webutil.google_image(phrase)
	# Filter out banners.
	images = filter(lambda im: float(im[1]) / float(im[2]) < 3, images)
	images = filter(lambda im: float(im[1]) / float(im[2]) < 3 and im[1] >= 20 and im[2] >= 20, images)
	if not images: return 'Nothing found.'
	imageurl = whrandom.choice(images)[0]
	filename = os.path.expanduser('~/.OpenVerse/images/google.gif')

@@ 141,9 168,12 @@ class HalClient(client.Client):

    def do_random(self, phrase):
	if whrandom.randint(1, 5) == 1:
	    r = self.do_reply(phrase)
	else: self.brain.train(phrase)
	    p, r = self.brain.reply(phrase)
	    print p, r
	    # Only send it if it's 'interesting'
	    if p > 5: self.server.chat(r)


    def do_move(self, x, y):
        self.pos = (x, y)

M microhal.py => microhal.py +474 -58
@@ 1,12 1,412 @@

import re, string, whrandom
import sys, re, string, whrandom
from math import *
from cPickle import dump, load

SWAPLIST = {"i": "you",
	"me": "you",
	"mine": "yours",
	"my": "your",
	"myself": "yourself",
	"no": "yes",
	"why": "because",
	"yes": "no",
	"you": "i",
	"you": "me",
	"your": "my",
	"yours": "mine",
	"yourself": "myself"}


class Entry:
    def __init__(self, key):
        self.key = key
    def __init__(self):
        self.freqs = {}
	self.count = 0

@@ 30,7 430,10 @@ class Entry:
	n = whrandom.randint(1, self.count)
	for symbol, freq in self.freqs.items():
	    total = total + freq
	    if n <= total: return symbol, -log(float(freq)/float(self.count))/log(2)
	    if n <= total: return symbol

    def prob(self, symbol):
        return float(self.freqs[symbol])/float(self.count)

class Brain:

@@ 38,8 441,7 @@ class Brain:
    whitespace_re = re.compile('\s+')

    def __init__(self):
        self.leaders = {}
	self.followers = {}
        self.markov = {}
    def parse(self, sentence):
        """Convert all whitespace to a single space, split up the sentence into alternating words and non-words, and remove any empty symbols."""

@@ 47,25 449,19 @@ class Brain:

    def train(self, sentence):
	# Split up the sentence and add sentinels at the beginning and end
        sentence = [None, None, None, None] + self.parse(sentence) + [None, None, None, None]
	for i in range(len(sentence) - 4):
	    symbol, sym1, sym2, sym3, sym4 = sentence[i:i+5]
        sentence = [None, None, None, None, None] + self.parse(sentence) + [None, None, None, None]
	for i in range(len(sentence) - 5):
	    symbol, sym1, sym2, sym3, sym4, sym5 = sentence[i:i+6]

            key = symbol, sym1, sym2, sym3
	    try: entry = self.followers[key]
            key = sym1, sym2, sym3, sym4
	    try: leaders, followers = self.markov[key]
	    except KeyError: 
	        entry = Entry(key)
		self.followers[key] = entry

	    key = sym1, sym2, sym3, sym4
	    try: entry = self.leaders[key]
	    except KeyError:
	        entry = Entry(key)
		self.leaders[key] = entry
	        leaders = Entry()
		followers = Entry()
		self.markov[key] = (leaders, followers)


    def train_from_file(self, file):
        if type(file) == type(''): file = open(file)

@@ 74,57 470,65 @@ class Brain:
	    if line[0] != '#': self.train(line)
	    line = file.readline()

    def generate_replies(self, keywords, n=10):
        """Generate a random sentence from a keyword."""
	# Initial seed is the word preceded by randomly selected preceder symbols
	# Pick one seed per keyword
	seeds = []
	for keyword in keywords:
	    candidates = filter(lambda key, k=keyword: key[3] == k, self.followers.keys())
	    if candidates: seeds.append(whrandom.choice(candidates))

	if not seeds: 
	    # Pick n random seeds
	    keys = self.followers.keys()
	    for i in range(n): seeds.append(whrandom.choice(keys))

        replies = []
	for i in range(n):
	    seed = whrandom.choice(seeds)

	return replies

    def generate_sentence(self, sentence):
        """Destructively modifies the seed!"""
	total_entropy = 0.0
	symbols = 0
	while sentence[-1]: 
	    symbol, entropy = self.followers[tuple(sentence[-4:])].choose()
	    symbol = self.markov[tuple(sentence[-4:])][1].choose()
	    total_entropy = total_entropy + entropy
	    symbols = symbols + 1
	# Generate the beginning of the sentence.
	while sentence[0]:
	    symbol, entropy = self.leaders[tuple(sentence[:4])].choose()
	    symbol = self.markov[tuple(sentence[:4])][0].choose()
	    sentence = [symbol] + sentence
	    total_entropy = total_entropy + entropy
	    symbols = symbols + 1

        # Strip sentinels
	while not sentence[0]: del sentence[0] 
	while not sentence[-1]: del sentence[-1]

	return total_entropy / symbols, sentence
	return sentence
    def reply(self, sentence):
        # Pick only word symbols that are in the frequency dict
        keywords = filter(lambda k: k[0] in string.letters, self.parse(sentence))
	# Generate 10 replies for now
        candidates = self.generate_replies(keywords, 10)
	return ''.join(candidates[-1][1]).capitalize()
        keywords = []
	for k in self.parse(sentence):
	    if k[0] in string.letters and k not in \
		try: k = SWAPLIST[k]
		except KeyError: pass
		if k not in keywords: keywords.append(k)
        keys = self.markov.keys()
	candidates = []
	for keyword in keywords:
	    seeds = filter(lambda s, k=keyword: s[3] == k, keys)
	    if seeds:
	        # generate 10 replies per keyword
	        for i in range(10):
	            candidate = self.generate_sentence(list(whrandom.choice(seeds)))
		    # Calculate the probability of each keywords' appearing
		    # in its position
		    # This is a total hack.
		    total_logprob = 0
		    num_keywords = 0
		    key = [None, None, None, None]
		    for symbol in candidate:
		        if symbol in keywords:
			    total_logprob = total_logprob - \
			    num_keywords = num_keywords + 1

			del key[0]

		    # Insert them in random order.
		    candidates.insert(whrandom.randint(0, len(candidates)), (total_logprob/num_keywords, candidate))

	if not candidates:
	    return 0.0, ''.join(self.generate_sentence(list(whrandom.choice(keys)))).capitalize()

	candidates.sort(lambda a, b: cmp(a[0], b[0]))
	avg_logprob, sentence = candidates[-1]
	return avg_logprob, ''.join(sentence).capitalize()
    def save(self, file):
        if type(file) == type(''): file = open(file, 'w')

@@ 135,3 539,15 @@ def load_brain(file):
    if type(file) == type(''): file = open(file, 'r')
    return load(file)

def main(argv):
    import microhal
    b = microhal.Brain()
    print >> sys.stderr, 'Training from %s...' % argv[1]
    print >> sys.stderr, 'Saving to %s...' % argv[2]
    print >> sys.stderr, 'Done.'

if __name__ == '__main__': main(sys.argv) 

M pvui_pygame.py => pvui_pygame.py +3 -1
@@ 1091,7 1091,7 @@ class Client:
                        except transutil.HandlerError, info: self.debug(info)
            else: self.redraw(self.entry.insert(event.unicode))
        elif event.type == MOUSEBUTTONDOWN:
	    if self.whichmouseover == '':

@@ 1112,6 1112,8 @@ class Client:

    # Transport-called functions
    def push(self, x, y, speed): pass

    def background_image(self, image):
        """Change the background"""
        self.background = image

M webutil.py => webutil.py +7 -6
@@ 1,4 1,4 @@
import urllib, re, string, Image, os
import urllib, re, string, os
from htmlentitydefs import entitydefs

babel_regex = re.compile('name="q">([^<]+)</textarea>', re.MULTILINE)

@@ 46,8 46,10 @@ def metar(station):

image_regex = re.compile('<img src=(/images\?q=tbn:\S+) width=(\d+) height=(\d+)')

def google_image(phrase):
   data = urllib.urlopen('http://images.google.com/images?q=%s&imgsafe=off' % urllib.quote_plus(phrase)).read()
def google_image(phrase, safe=1):
   if safe: imgsafe = 'on'
   else: imgsafe = 'off'
   data = urllib.urlopen('http://images.google.com/images?q=%s&imgsafe=%s' % (urllib.quote_plus(phrase), imgsafe)).read()
   images = []
   while data:
       m = image_regex.search(data)

@@ 57,9 59,8 @@ def google_image(phrase):
   return images

def image(url, filename):
    localfile = urllib.urlretrieve(url)[0]
    im = Image.open(localfile).convert('P', palette=Image.ADAPTIVE)
    urllib.urlretrieve(url, 'google.jpg')[0]
    os.spawnlp(os.P_WAIT, 'convert', 'convert', 'google.jpg', filename)