~seanlynch/pythonverse

d534b84b168ffe933abc9077fbcd22d13fa2e2b8 — seanl 22 years ago bdfd37e
Now uses unicode entirely internally, with the transport module doing all
conversion to encodings expected by the server.
5 files changed, 547 insertions(+), 88 deletions(-)

M OpenVerse.py
M froggirl.py
M microhal.py
M pvui_pygame.py
M webutil.py
M OpenVerse.py => OpenVerse.py +22 -12
@@ 16,11 16,12 @@
# along with PythonVerse; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
#
import sys, os, asyncore, asynchat, socket, string, struct, stat
import sys, os, asyncore, asynchat, socket, string, struct, stat, codecs
import transutil

# Global constants are all caps; global variables start with _

ENCODING = 'ISO-8859-1'
BALLOONXOFFSET = 15
HOME = os.path.expanduser('~/.OpenVerse')
ANIMDIR = os.path.join(HOME, 'anims')


@@ 31,6 32,10 @@ OBJDIR = os.path.join(HOME, 'objects')
RIMAGEDIR = os.path.join(HOME, 'rimages')
ROOMDIR = os.path.join(HOME, 'rooms')

text_decode = codecs.lookup(ENCODING)[1]

def decode(s):
    return text_decode(s)[0]

def checkcache(filename, size):
    try: s = os.stat(filename)[stat.ST_SIZE]


@@ 167,7 172,7 @@ class ServerConnection(transutil.Connection):
    def handle_connect(self):
        size = os.stat(self.avatar_filename)[stat.ST_SIZE]
        self.write("AUTH %s %d %d %s %d %d %d %d %d\r\n" %
                  (self.nick, 320, 200, os.path.basename(self.avatar_filename), self.nx, self.ny,
                  (self.nick.encode(ENCODING, 'replace'), 320, 200, os.path.basename(self.avatar_filename), self.nx, self.ny,
                   size, self.bx, self.by))

    def handle_close(self):


@@ 239,7 244,7 @@ class ServerConnection(transutil.Connection):

    def move(self, pos):
        x, y = pos
        self.write('MOVE %s %d %d 20\r\n' % (self.nick, x, y))
        self.write('MOVE %s %d %d 20\r\n' % (self.nick.encode(ENCODING, 'replace'), x, y))

    def push(self):
	self.write('PUSH 100\r\n')


@@ 250,16 255,16 @@ class ServerConnection(transutil.Connection):
    def privmsg(self, nicks, text):
	if type(nicks) == type(''): nicks = [nicks]
	for n in nicks:
	    self.write('PRIVMSG %s %s\r\n' % (n, text))
	    self.write('PRIVMSG %s %s\r\n' % (n.encode(ENCODING, 'replace'), text))
	return self.nick

    def url(self, nicks, url):
	if type(nicks) == type(''): nicks = [nicks]
	for n in nicks:
	    self.write('URL %s %s\r\n' % (n, url))
	    self.write('URL %s %s\r\n' % (n.encode(ENCODING, 'replace'), url))

    def chat(self, text):
        self.write('CHAT %s\r\n' % text)
        self.write('CHAT %s\r\n' % text.encode(ENCODING, 'replace'))

    def set_nick(self, nick):
        self.nick = nick


@@ 331,11 336,11 @@ class ServerConnection(transutil.Connection):

    def cmd_CHAT(self, line):
        cmd, nick, text = line.split(' ', 2)
        self.client.chat(nick, text)
        self.client.chat(decode(nick), decode(text))

    def cmd_SCHAT(self, line):
        cmd, emote, nick, text = line.split(' ', 3)
        self.client.chat(nick, '*%s* %s' % (emote, text))
        self.client.chat(nick, '*%s* %s' % (emote, decode(text)))

    def cmd_MOVE(self, line):
	cmd, nick, x, y, speed = line.split()


@@ 360,6 365,7 @@ class ServerConnection(transutil.Connection):

    def cmd_AVATAR(self, line):
        cmd, nick, filename, nx, ny, size, bx, by = line.split()
	nick = decode(nick)
	nx = int(nx)
	ny = int(ny)
	size = int(size)


@@ 379,10 385,11 @@ class ServerConnection(transutil.Connection):
        
    def cmd_URL(self, line):
    	cmd, nick, text = line.split(' ', 2)
        self.client.url(nick, text)
        self.client.url(decode(nick), text)

    def cmd_NEW(self, line):
        cmd, nick, x, y, filename, nx, ny, size, bx, by = line.split()
	nick = decode(nick)
	x = int(x)
	y = int(y)
	nx = int(nx)


@@ 399,11 406,11 @@ class ServerConnection(transutil.Connection):

    def cmd_NOMORE(self, line):
        cmd, nick = line.split()
        self.client.del_avatar(nick)
        self.client.del_avatar(decode(nick))

    def cmd_EXIT_OBJ(self, line):
        cmd, name, x1, y1, x2, y2, duration, host, port = line.split()
	self.client.exit_obj(name, host, port)
	self.client.exit_obj(decode(name), host, int(port))

    def cmd_DCCGETAV(self, line):
        cmd, port, filename, size = line.split()


@@ 427,10 434,11 @@ class ServerConnection(transutil.Connection):
        
    def cmd_ROOMNAME(self, line):
        cmd, name = line.split(' ', 1)
        self.client.set_title(name)
        self.client.set_title(decode(name))

    def cmd_MOUSEOVER(self, line):
        cmd, name, x, y, image1, size1, image2, size2, flag = line.split()
	name = decode(name)
	x = int(x)
	y = int(y)
	size1 = int(size1)


@@ 449,6 457,8 @@ class ServerConnection(transutil.Connection):

    def cmd_WHOIS(self, line):
        cmd, nick, text = line.split(' ', 2)
	nick = decode(nick)
	text = decode(text)
        self.client.chat(nick, '*%s* is %s' % (nick, text))

    def cmd_PUSH(self, line):

M froggirl.py => froggirl.py +41 -11
@@ 21,7 21,7 @@

import sys, os, string, re, asyncore, codecs, traceback, whrandom, time
import OpenVerse, client
import webutil, kanjidic, microhal
import webutil,  microhal

#os.environ['WNHOME'] = '/usr/share/wordnet'
#os.environ['WNSEARCHDIR'] = '/usr/share/wordnet'


@@ 37,8 37,31 @@ languages = {'e': 'en',
	     's': 'es',
	     'j': 'ja'}

fromutf8 = codecs.lookup('UTF8')[1]
fromlatin1 = codecs.lookup('ISO-8859-1')[1]
utf8_decode = codecs.lookup('UTF8')[1]
latin1_decode = codecs.lookup('ISO-8859-1')[1]
euc_jp_decode = codecs.lookup('japanese.euc-jp')[1]

unicode_re = re.compile('#([0-9a-fA-F]{4})')
def fromlatin1(s):
    """Translate a string from latin1 and convert "#xxxx" to unicode 
    characters"""

    l = unicode_re.split(latin1_decode(s)[0])
    for i in range(1, len(l), 2): l[i] = unichr(int(l[i], 16))
        
    return u''.join(l)

def fromutf8(s): return utf8_decode(s)[0]
def fromeuc(s): return euc_jp_decode(s)[0]

def pipe(c, s):
    """Pass a string to a pipe and return the result."""
    stdin, stdout = os.popen2(c)
    stdin.write(s)
    stdin.close()
    output = stdout.read()
    stdout.close()
    return output

class HalClient(client.Client):
    commands = [('([efigpsj])((2[efigpsj])+)\s+(.+)$',


@@ 78,7 101,7 @@ class HalClient(client.Client):

    # commands.
    def do_reply(self, sentence):
        reply = self.brain.reply(sentence)
        avg_logprob, reply = self.brain.reply(sentence)
        self.brain.train(sentence)
        return reply



@@ 100,10 123,13 @@ class HalClient(client.Client):
	
    def do_babel(self, lang1, langs, phrase):
	lang1 = languages[lang1]
	r = fromlatin1(phrase)[0].encode('UTF8')
	phrase = fromlatin1(phrase)
	if lang1 == 'ja':
	    # Convert romaji to kana
	    phrase = fromeuc(pipe('./romkan.rb', phrase.encode('japanese.euc-jp', 'ignore')))
	r = phrase.encode('UTF8')
	for l in langs[1:].split('2'):
	    lang2 = languages[l]
	    print 'Translating %s from %s to %s' % (r, lang1, lang2)
	    try: r = webutil.translate(r, lang1, lang2)
	    except:
		exception, info = sys.exc_info()[:2]


@@ 114,7 140,8 @@ class HalClient(client.Client):
	        print r
	        lang1 = lang2

	return kanjidic.romajify(fromutf8(r)[0]).encode('ISO-8859-1', 'replace')
	#r = fromeuc(pipe('kakasi -Ea -Ha -Ka -Ja', fromutf8(r).encode('japanese.euc-jp', 'replace')))
	return fromutf8(r).encode('ISO-8859-1', 'replace')

    def do_google(self, phrase, groups=0):
        hits = webutil.google(phrase, groups)


@@ 131,7 158,7 @@ class HalClient(client.Client):
	    return
        images = webutil.google_image(phrase)
	# Filter out banners.
	images = filter(lambda im: float(im[1]) / float(im[2]) < 3, images)
	images = filter(lambda im: float(im[1]) / float(im[2]) < 3 and im[1] >= 20 and im[2] >= 20, images)
	if not images: return 'Nothing found.'
	imageurl = whrandom.choice(images)[0]
	filename = os.path.expanduser('~/.OpenVerse/images/google.gif')


@@ 141,9 168,12 @@ class HalClient(client.Client):

    def do_random(self, phrase):
	if whrandom.randint(1, 5) == 1:
	    r = self.do_reply(phrase)
	    self.server.chat(r)
	else: self.brain.train(phrase)
	    p, r = self.brain.reply(phrase)
	    print p, r
	    # Only send it if it's 'interesting'
	    if p > 5: self.server.chat(r)

	self.brain.train(phrase)

    def do_move(self, x, y):
        self.pos = (x, y)

M microhal.py => microhal.py +474 -58
@@ 1,12 1,412 @@
#!/usr/bin/python

import re, string, whrandom
import sys, re, string, whrandom
from math import *
from cPickle import dump, load

SWAPLIST = {"i": "you",
	"me": "you",
	"mine": "yours",
	"my": "your",
	"myself": "yourself",
	"no": "yes",
	"why": "because",
	"yes": "no",
	"you": "i",
	"you": "me",
	"your": "my",
	"yours": "mine",
	"yourself": "myself"}

STOPLIST = [
	"s",
	"t",
	"a",
	"ability",
	"able",
	"about",
	"absolute",
	"absolutely",
	"across",
	"actual",
	"actually",
	"after",
	"afternoon",
	"again",
	"against",
	"ago",
	"agree",
	"all",
	"almost",
	"along",
	"already",
	"although",
	"always",
	"am",
	"an",
	"and",
	"another",
	"any",
	"anyhow",
	"anything",
	"anyway",
	"are",
	"aren",
	"around",
	"as",
	"at",
	"away",
	"back",
	"bad",
	"be",
	"been",
	"before",
	"behind",
	"being",
	"believe",
	"belong",
	"best",
	"better",
	"between",
	"big",
	"bigger",
	"biggest",
	"bit",
	"both",
	"buddy",
	"but",
	"by",
	"call",
	"called",
	"calling",
	"came",
	"can",
	"cannot",
	"care",
	"caring",
	"case",
	"catch",
	"caught",
	"certain",
	"certainly",
	"change",
	"close",
	"closer",
	"come",
	"coming",
	"common",
	"constant",
	"constantly",
	"could",
	"current",
	"day",
	"days",
	"derived",
	"describe",
	"describes",
	"determine",
	"determines",
	"did",
	"didn",
	"do",
	"does",
	"doesn",
	"doing",
	"don",
	"done",
	"doubt",
	"down",
	"each",
	"earlier",
	"early",
	"else",
	"enjoy",
	"especially",
	"even",
	"ever",
	"every",
	"everybody",
	"everyone",
	"everything",
	"fact",
	"fair",
	"fairly",
	"far",
	"fellow",
	"few",
	"find",
	"fine",
	"for",
	"form",
	"found",
	"from",
	"full",
	"further",
	"gave",
	"get",
	"getting",
	"give",
	"given",
	"giving",
	"go",
	"going",
	"gone",
	"good",
	"got",
	"gotten",
	"great",
	"had",
	"has",
	"hasn",
	"have",
	"haven",
	"having",
	"held",
	"here",
	"high",
	"hold",
	"holding",
	"how",
	"if",
	"in",
	"indeed",
	"inside",
	"instead",
	"into",
	"is",
	"isn",
	"it",
	"it",
	"its",
	"just",
	"keep",
	"kind",
	"knew",
	"know",
	"known",
	"large",
	"larger",
	"largets",
	"last",
	"late",
	"later",
	"least",
	"less",
	"let",
	"let",
	"level",
	"likes",
	"little",
	"long",
	"longer",
	"look",
	"looked",
	"looking",
	"looks",
	"low",
	"made",
	"make",
	"making",
	"many",
	"mate",
	"may",
	"maybe",
	"mean",
	"meet",
	"mention",
	"mere",
	"might",
	"moment",
	"more",
	"morning",
	"most",
	"move",
	"much",
	"must",
	"near",
	"nearer",
	"never",
	"next",
	"nice",
	"nobody",
	"none",
	"noon",
	"noone",
	"not",
	"note",
	"nothing",
	"now",
	"obvious",
	"of",
	"off",
	"on",
	"once",
	"only",
	"onto",
	"opinion",
	"or",
	"other",
	"our",
	"out",
	"over",
	"own",
	"part",
	"particular",
	"particularly",
	"perhaps",
	"person",
	"piece",
	"place",
	"pleasant",
	"please",
	"popular",
	"prefer",
	"pretty",
	"put",
	"quite",
	"real",
	"really",
	"receive",
	"received",
	"recent",
	"recently",
	"related",
	"result",
	"resulting",
	"results",
	"said",
	"same",
	"saw",
	"say",
	"saying",
	"see",
	"seem",
	"seemed",
	"seems",
	"seen",
	"seldom",
	"sense",
	"set",
	"several",
	"shall",
	"short",
	"shorter",
	"should",
	"show",
	"shows",
	"simple",
	"simply",
	"small",
	"so",
	"some",
	"someone",
	"something",
	"sometime",
	"sometimes",
	"somewhere",
	"sort",
	"sorts",
	"spend",
	"spent",
	"still",
	"stuff",
	"such",
	"suggest",
	"suggestion",
	"suppose",
	"sure",
	"surely",
	"surround",
	"surrounds",
	"take",
	"taken",
	"taking",
	"tell",
	"than",
	"thank",
	"thanks",
	"that",
	"that",
	"thats",
	"the",
	"their",
	"them",
	"then",
	"there",
	"therefore",
	"these",
	"they",
	"thing",
	"things",
	"this",
	"those",
	"though",
	"thoughts",
	"thouroughly",
	"through",
	"tiny",
	"to",
	"today",
	"together",
	"told",
	"tomorrow",
	"too",
	"total",
	"totally",
	"touch",
	"try",
	"twice",
	"under",
	"understand",
	"understood",
	"until",
	"up",
	"us",
	"used",
	"using",
	"usually",
	"various",
	"very",
	"want",
	"wanted",
	"wants",
	"was",
	"watch",
	"way",
	"ways",
	"we",
	"re",
	"well",
	"went",
	"were",
	"what",
	"what",
	"whatever",
	"whats",
	"when",
	"where",
	"where",
	"which",
	"while",
	"whilst",
	"who",
	"who",
	"whom",
	"will",
	"wish",
	"with",
	"within",
	"wonder",
	"wonderful",
	"worse",
	"worst",
	"would",
	"wrong",
	"yesterday",
	"yet"]

class Entry:
    def __init__(self, key):
        self.key = key
    def __init__(self):
        self.freqs = {}
	self.count = 0



@@ 30,7 430,10 @@ class Entry:
	n = whrandom.randint(1, self.count)
	for symbol, freq in self.freqs.items():
	    total = total + freq
	    if n <= total: return symbol, -log(float(freq)/float(self.count))/log(2)
	    if n <= total: return symbol

    def prob(self, symbol):
        return float(self.freqs[symbol])/float(self.count)


class Brain:


@@ 38,8 441,7 @@ class Brain:
    whitespace_re = re.compile('\s+')

    def __init__(self):
        self.leaders = {}
	self.followers = {}
        self.markov = {}
 
    def parse(self, sentence):
        """Convert all whitespace to a single space, split up the sentence into alternating words and non-words, and remove any empty symbols."""


@@ 47,25 449,19 @@ class Brain:

    def train(self, sentence):
	# Split up the sentence and add sentinels at the beginning and end
        sentence = [None, None, None, None] + self.parse(sentence) + [None, None, None, None]
	for i in range(len(sentence) - 4):
	    symbol, sym1, sym2, sym3, sym4 = sentence[i:i+5]
        sentence = [None, None, None, None, None] + self.parse(sentence) + [None, None, None, None]
	for i in range(len(sentence) - 5):
	    symbol, sym1, sym2, sym3, sym4, sym5 = sentence[i:i+6]

            key = symbol, sym1, sym2, sym3
	    try: entry = self.followers[key]
            key = sym1, sym2, sym3, sym4
	    try: leaders, followers = self.markov[key]
	    except KeyError: 
	        entry = Entry(key)
		self.followers[key] = entry
            
	    entry.add_symbol(sym4)

	    key = sym1, sym2, sym3, sym4
	    try: entry = self.leaders[key]
	    except KeyError:
	        entry = Entry(key)
		self.leaders[key] = entry
	        leaders = Entry()
		followers = Entry()
		self.markov[key] = (leaders, followers)

	    entry.add_symbol(symbol)
	    leaders.add_symbol(symbol)
	    followers.add_symbol(sym5)

    def train_from_file(self, file):
        if type(file) == type(''): file = open(file)


@@ 74,57 470,65 @@ class Brain:
	    if line[0] != '#': self.train(line)
	    line = file.readline()

    def generate_replies(self, keywords, n=10):
        """Generate a random sentence from a keyword."""
	# Initial seed is the word preceded by randomly selected preceder symbols
	# Pick one seed per keyword
	seeds = []
	for keyword in keywords:
	    candidates = filter(lambda key, k=keyword: key[3] == k, self.followers.keys())
	    if candidates: seeds.append(whrandom.choice(candidates))

	if not seeds: 
	    # Pick n random seeds
	    keys = self.followers.keys()
	    for i in range(n): seeds.append(whrandom.choice(keys))

        replies = []
	for i in range(n):
	    seed = whrandom.choice(seeds)
	    replies.append(self.generate_sentence(list(seed)))

	return replies

    def generate_sentence(self, sentence):
        """Destructively modifies the seed!"""
	total_entropy = 0.0
	symbols = 0
	while sentence[-1]: 
	    symbol, entropy = self.followers[tuple(sentence[-4:])].choose()
	    symbol = self.markov[tuple(sentence[-4:])][1].choose()
	    sentence.append(symbol)
	    total_entropy = total_entropy + entropy
	    symbols = symbols + 1
	
	# Generate the beginning of the sentence.
	while sentence[0]:
	    symbol, entropy = self.leaders[tuple(sentence[:4])].choose()
	    symbol = self.markov[tuple(sentence[:4])][0].choose()
	    sentence = [symbol] + sentence
	    total_entropy = total_entropy + entropy
	    symbols = symbols + 1

        # Strip sentinels
	while not sentence[0]: del sentence[0] 
	while not sentence[-1]: del sentence[-1]

	return total_entropy / symbols, sentence
	return sentence
        
    def reply(self, sentence):
        # Pick only word symbols that are in the frequency dict
        keywords = filter(lambda k: k[0] in string.letters, self.parse(sentence))
	# Generate 10 replies for now
        candidates = self.generate_replies(keywords, 10)
	candidates.sort()
	return ''.join(candidates[-1][1]).capitalize()
        keywords = []
	for k in self.parse(sentence):
	    if k[0] in string.letters and k not in \
	        STOPLIST:
		try: k = SWAPLIST[k]
		except KeyError: pass
		if k not in keywords: keywords.append(k)
	    
        keys = self.markov.keys()
	candidates = []
	for keyword in keywords:
	    seeds = filter(lambda s, k=keyword: s[3] == k, keys)
	    if seeds:
	        # generate 10 replies per keyword
	        for i in range(10):
	            candidate = self.generate_sentence(list(whrandom.choice(seeds)))
		    # Calculate the probability of each keywords' appearing
		    # in its position
		    # This is a total hack.
		    total_logprob = 0
		    num_keywords = 0
		    key = [None, None, None, None]
		    for symbol in candidate:
		        if symbol in keywords:
			    total_logprob = total_logprob - \
			        log(self.markov[tuple(key)][1].prob(symbol))
			    num_keywords = num_keywords + 1

			del key[0]
			key.append(symbol)

		    # Insert them in random order.
		    candidates.insert(whrandom.randint(0, len(candidates)), (total_logprob/num_keywords, candidate))


	if not candidates:
	    return 0.0, ''.join(self.generate_sentence(list(whrandom.choice(keys)))).capitalize()

	candidates.sort(lambda a, b: cmp(a[0], b[0]))
	avg_logprob, sentence = candidates[-1]
	return avg_logprob, ''.join(sentence).capitalize()
        
    def save(self, file):
        if type(file) == type(''): file = open(file, 'w')


@@ 135,3 539,15 @@ def load_brain(file):
    if type(file) == type(''): file = open(file, 'r')
    return load(file)


def main(argv):
    import microhal
    b = microhal.Brain()
    print >> sys.stderr, 'Training from %s...' % argv[1]
    b.train_from_file(argv[1])
    print >> sys.stderr, 'Saving to %s...' % argv[2]
    b.save(argv[2])
    print >> sys.stderr, 'Done.'

if __name__ == '__main__': main(sys.argv) 


M pvui_pygame.py => pvui_pygame.py +3 -1
@@ 1091,7 1091,7 @@ class Client:
                        except transutil.HandlerError, info: self.debug(info)
                        return
                
                self.server.chat(text.encode('utf-8'))
                self.server.chat(text)
            else: self.redraw(self.entry.insert(event.unicode))
        elif event.type == MOUSEBUTTONDOWN:
	    if self.whichmouseover == '':


@@ 1112,6 1112,8 @@ class Client:

    # Transport-called functions
    
    def push(self, x, y, speed): pass

    def background_image(self, image):
        """Change the background"""
        self.background = image

M webutil.py => webutil.py +7 -6
@@ 1,4 1,4 @@
import urllib, re, string, Image, os
import urllib, re, string, os
from htmlentitydefs import entitydefs

babel_regex = re.compile('name="q">([^<]+)</textarea>', re.MULTILINE)


@@ 46,8 46,10 @@ def metar(station):

image_regex = re.compile('<img src=(/images\?q=tbn:\S+) width=(\d+) height=(\d+)')

def google_image(phrase):
   data = urllib.urlopen('http://images.google.com/images?q=%s&imgsafe=off' % urllib.quote_plus(phrase)).read()
def google_image(phrase, safe=1):
   if safe: imgsafe = 'on'
   else: imgsafe = 'off'
   data = urllib.urlopen('http://images.google.com/images?q=%s&imgsafe=%s' % (urllib.quote_plus(phrase), imgsafe)).read()
   images = []
   while data:
       m = image_regex.search(data)


@@ 57,9 59,8 @@ def google_image(phrase):
   return images

def image(url, filename):
    localfile = urllib.urlretrieve(url)[0]
    im = Image.open(localfile).convert('P', palette=Image.ADAPTIVE)
    im.save(filename)
    urllib.urlretrieve(url, 'google.jpg')[0]
    os.spawnlp(os.P_WAIT, 'convert', 'convert', 'google.jpg', filename)
    urllib.urlcleanup()