pythonverse

Python-based client for OpenVerse with extra features
git clone https://code.literati.org/pythonverse.git
Log | Files | Refs | README | LICENSE

commit d534b84b168ffe933abc9077fbcd22d13fa2e2b8
parent bdfd37ebcee0f36b14623d3b5a7faf5428995196
Author: seanl <seanl>
Date:   Thu, 28 Mar 2002 23:57:10 +0000

Now uses unicode entirely internally, with the transport module doing all
conversion to encodings expected by the server.

Diffstat:
MOpenVerse.py | 34++++++++++++++++++++++------------
Mfroggirl.py | 52+++++++++++++++++++++++++++++++++++++++++-----------
Mmicrohal.py | 532++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---------
Mpvui_pygame.py | 4+++-
Mwebutil.py | 13+++++++------
5 files changed, 547 insertions(+), 88 deletions(-)

diff --git a/OpenVerse.py b/OpenVerse.py @@ -16,11 +16,12 @@ # along with PythonVerse; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # -import sys, os, asyncore, asynchat, socket, string, struct, stat +import sys, os, asyncore, asynchat, socket, string, struct, stat, codecs import transutil # Global constants are all caps; global variables start with _ +ENCODING = 'ISO-8859-1' BALLOONXOFFSET = 15 HOME = os.path.expanduser('~/.OpenVerse') ANIMDIR = os.path.join(HOME, 'anims') @@ -31,6 +32,10 @@ OBJDIR = os.path.join(HOME, 'objects') RIMAGEDIR = os.path.join(HOME, 'rimages') ROOMDIR = os.path.join(HOME, 'rooms') +text_decode = codecs.lookup(ENCODING)[1] + +def decode(s): + return text_decode(s)[0] def checkcache(filename, size): try: s = os.stat(filename)[stat.ST_SIZE] @@ -167,7 +172,7 @@ class ServerConnection(transutil.Connection): def handle_connect(self): size = os.stat(self.avatar_filename)[stat.ST_SIZE] self.write("AUTH %s %d %d %s %d %d %d %d %d\r\n" % - (self.nick, 320, 200, os.path.basename(self.avatar_filename), self.nx, self.ny, + (self.nick.encode(ENCODING, 'replace'), 320, 200, os.path.basename(self.avatar_filename), self.nx, self.ny, size, self.bx, self.by)) def handle_close(self): @@ -239,7 +244,7 @@ class ServerConnection(transutil.Connection): def move(self, pos): x, y = pos - self.write('MOVE %s %d %d 20\r\n' % (self.nick, x, y)) + self.write('MOVE %s %d %d 20\r\n' % (self.nick.encode(ENCODING, 'replace'), x, y)) def push(self): self.write('PUSH 100\r\n') @@ -250,16 +255,16 @@ class ServerConnection(transutil.Connection): def privmsg(self, nicks, text): if type(nicks) == type(''): nicks = [nicks] for n in nicks: - self.write('PRIVMSG %s %s\r\n' % (n, text)) + self.write('PRIVMSG %s %s\r\n' % (n.encode(ENCODING, 'replace'), text)) return self.nick def url(self, nicks, url): if type(nicks) == type(''): nicks = [nicks] for n in nicks: - self.write('URL %s %s\r\n' % (n, url)) + self.write('URL %s %s\r\n' % (n.encode(ENCODING, 'replace'), url)) def chat(self, text): - self.write('CHAT %s\r\n' % text) + self.write('CHAT %s\r\n' % text.encode(ENCODING, 'replace')) def set_nick(self, nick): self.nick = nick @@ -331,11 +336,11 @@ class ServerConnection(transutil.Connection): def cmd_CHAT(self, line): cmd, nick, text = line.split(' ', 2) - self.client.chat(nick, text) + self.client.chat(decode(nick), decode(text)) def cmd_SCHAT(self, line): cmd, emote, nick, text = line.split(' ', 3) - self.client.chat(nick, '*%s* %s' % (emote, text)) + self.client.chat(nick, '*%s* %s' % (emote, decode(text))) def cmd_MOVE(self, line): cmd, nick, x, y, speed = line.split() @@ -360,6 +365,7 @@ class ServerConnection(transutil.Connection): def cmd_AVATAR(self, line): cmd, nick, filename, nx, ny, size, bx, by = line.split() + nick = decode(nick) nx = int(nx) ny = int(ny) size = int(size) @@ -379,10 +385,11 @@ class ServerConnection(transutil.Connection): def cmd_URL(self, line): cmd, nick, text = line.split(' ', 2) - self.client.url(nick, text) + self.client.url(decode(nick), text) def cmd_NEW(self, line): cmd, nick, x, y, filename, nx, ny, size, bx, by = line.split() + nick = decode(nick) x = int(x) y = int(y) nx = int(nx) @@ -399,11 +406,11 @@ class ServerConnection(transutil.Connection): def cmd_NOMORE(self, line): cmd, nick = line.split() - self.client.del_avatar(nick) + self.client.del_avatar(decode(nick)) def cmd_EXIT_OBJ(self, line): cmd, name, x1, y1, x2, y2, duration, host, port = line.split() - self.client.exit_obj(name, host, port) + self.client.exit_obj(decode(name), host, int(port)) def cmd_DCCGETAV(self, line): cmd, port, filename, size = line.split() @@ -427,10 +434,11 @@ class ServerConnection(transutil.Connection): def cmd_ROOMNAME(self, line): cmd, name = line.split(' ', 1) - self.client.set_title(name) + self.client.set_title(decode(name)) def cmd_MOUSEOVER(self, line): cmd, name, x, y, image1, size1, image2, size2, flag = line.split() + name = decode(name) x = int(x) y = int(y) size1 = int(size1) @@ -449,6 +457,8 @@ class ServerConnection(transutil.Connection): def cmd_WHOIS(self, line): cmd, nick, text = line.split(' ', 2) + nick = decode(nick) + text = decode(text) self.client.chat(nick, '*%s* is %s' % (nick, text)) def cmd_PUSH(self, line): diff --git a/froggirl.py b/froggirl.py @@ -21,7 +21,7 @@ import sys, os, string, re, asyncore, codecs, traceback, whrandom, time import OpenVerse, client -import webutil, kanjidic, microhal +import webutil, microhal #os.environ['WNHOME'] = '/usr/share/wordnet' #os.environ['WNSEARCHDIR'] = '/usr/share/wordnet' @@ -37,8 +37,31 @@ languages = {'e': 'en', 's': 'es', 'j': 'ja'} -fromutf8 = codecs.lookup('UTF8')[1] -fromlatin1 = codecs.lookup('ISO-8859-1')[1] +utf8_decode = codecs.lookup('UTF8')[1] +latin1_decode = codecs.lookup('ISO-8859-1')[1] +euc_jp_decode = codecs.lookup('japanese.euc-jp')[1] + +unicode_re = re.compile('#([0-9a-fA-F]{4})') +def fromlatin1(s): + """Translate a string from latin1 and convert "#xxxx" to unicode + characters""" + + l = unicode_re.split(latin1_decode(s)[0]) + for i in range(1, len(l), 2): l[i] = unichr(int(l[i], 16)) + + return u''.join(l) + +def fromutf8(s): return utf8_decode(s)[0] +def fromeuc(s): return euc_jp_decode(s)[0] + +def pipe(c, s): + """Pass a string to a pipe and return the result.""" + stdin, stdout = os.popen2(c) + stdin.write(s) + stdin.close() + output = stdout.read() + stdout.close() + return output class HalClient(client.Client): commands = [('([efigpsj])((2[efigpsj])+)\s+(.+)$', @@ -78,7 +101,7 @@ class HalClient(client.Client): # commands. def do_reply(self, sentence): - reply = self.brain.reply(sentence) + avg_logprob, reply = self.brain.reply(sentence) self.brain.train(sentence) return reply @@ -100,10 +123,13 @@ class HalClient(client.Client): def do_babel(self, lang1, langs, phrase): lang1 = languages[lang1] - r = fromlatin1(phrase)[0].encode('UTF8') + phrase = fromlatin1(phrase) + if lang1 == 'ja': + # Convert romaji to kana + phrase = fromeuc(pipe('./romkan.rb', phrase.encode('japanese.euc-jp', 'ignore'))) + r = phrase.encode('UTF8') for l in langs[1:].split('2'): lang2 = languages[l] - print 'Translating %s from %s to %s' % (r, lang1, lang2) try: r = webutil.translate(r, lang1, lang2) except: exception, info = sys.exc_info()[:2] @@ -114,7 +140,8 @@ class HalClient(client.Client): print r lang1 = lang2 - return kanjidic.romajify(fromutf8(r)[0]).encode('ISO-8859-1', 'replace') + #r = fromeuc(pipe('kakasi -Ea -Ha -Ka -Ja', fromutf8(r).encode('japanese.euc-jp', 'replace'))) + return fromutf8(r).encode('ISO-8859-1', 'replace') def do_google(self, phrase, groups=0): hits = webutil.google(phrase, groups) @@ -131,7 +158,7 @@ class HalClient(client.Client): return images = webutil.google_image(phrase) # Filter out banners. - images = filter(lambda im: float(im[1]) / float(im[2]) < 3, images) + images = filter(lambda im: float(im[1]) / float(im[2]) < 3 and im[1] >= 20 and im[2] >= 20, images) if not images: return 'Nothing found.' imageurl = whrandom.choice(images)[0] filename = os.path.expanduser('~/.OpenVerse/images/google.gif') @@ -141,9 +168,12 @@ class HalClient(client.Client): def do_random(self, phrase): if whrandom.randint(1, 5) == 1: - r = self.do_reply(phrase) - self.server.chat(r) - else: self.brain.train(phrase) + p, r = self.brain.reply(phrase) + print p, r + # Only send it if it's 'interesting' + if p > 5: self.server.chat(r) + + self.brain.train(phrase) def do_move(self, x, y): self.pos = (x, y) diff --git a/microhal.py b/microhal.py @@ -1,12 +1,412 @@ #!/usr/bin/python -import re, string, whrandom +import sys, re, string, whrandom from math import * from cPickle import dump, load +SWAPLIST = {"i": "you", + "me": "you", + "mine": "yours", + "my": "your", + "myself": "yourself", + "no": "yes", + "why": "because", + "yes": "no", + "you": "i", + "you": "me", + "your": "my", + "yours": "mine", + "yourself": "myself"} + +STOPLIST = [ + "s", + "t", + "a", + "ability", + "able", + "about", + "absolute", + "absolutely", + "across", + "actual", + "actually", + "after", + "afternoon", + "again", + "against", + "ago", + "agree", + "all", + "almost", + "along", + "already", + "although", + "always", + "am", + "an", + "and", + "another", + "any", + "anyhow", + "anything", + "anyway", + "are", + "aren", + "around", + "as", + "at", + "away", + "back", + "bad", + "be", + "been", + "before", + "behind", + "being", + "believe", + "belong", + "best", + "better", + "between", + "big", + "bigger", + "biggest", + "bit", + "both", + "buddy", + "but", + "by", + "call", + "called", + "calling", + "came", + "can", + "cannot", + "care", + "caring", + "case", + "catch", + "caught", + "certain", + "certainly", + "change", + "close", + "closer", + "come", + "coming", + "common", + "constant", + "constantly", + "could", + "current", + "day", + "days", + "derived", + "describe", + "describes", + "determine", + "determines", + "did", + "didn", + "do", + "does", + "doesn", + "doing", + "don", + "done", + "doubt", + "down", + "each", + "earlier", + "early", + "else", + "enjoy", + "especially", + "even", + "ever", + "every", + "everybody", + "everyone", + "everything", + "fact", + "fair", + "fairly", + "far", + "fellow", + "few", + "find", + "fine", + "for", + "form", + "found", + "from", + "full", + "further", + "gave", + "get", + "getting", + "give", + "given", + "giving", + "go", + "going", + "gone", + "good", + "got", + "gotten", + "great", + "had", + "has", + "hasn", + "have", + "haven", + "having", + "held", + "here", + "high", + "hold", + "holding", + "how", + "if", + "in", + "indeed", + "inside", + "instead", + "into", + "is", + "isn", + "it", + "it", + "its", + "just", + "keep", + "kind", + "knew", + "know", + "known", + "large", + "larger", + "largets", + "last", + "late", + "later", + "least", + "less", + "let", + "let", + "level", + "likes", + "little", + "long", + "longer", + "look", + "looked", + "looking", + "looks", + "low", + "made", + "make", + "making", + "many", + "mate", + "may", + "maybe", + "mean", + "meet", + "mention", + "mere", + "might", + "moment", + "more", + "morning", + "most", + "move", + "much", + "must", + "near", + "nearer", + "never", + "next", + "nice", + "nobody", + "none", + "noon", + "noone", + "not", + "note", + "nothing", + "now", + "obvious", + "of", + "off", + "on", + "once", + "only", + "onto", + "opinion", + "or", + "other", + "our", + "out", + "over", + "own", + "part", + "particular", + "particularly", + "perhaps", + "person", + "piece", + "place", + "pleasant", + "please", + "popular", + "prefer", + "pretty", + "put", + "quite", + "real", + "really", + "receive", + "received", + "recent", + "recently", + "related", + "result", + "resulting", + "results", + "said", + "same", + "saw", + "say", + "saying", + "see", + "seem", + "seemed", + "seems", + "seen", + "seldom", + "sense", + "set", + "several", + "shall", + "short", + "shorter", + "should", + "show", + "shows", + "simple", + "simply", + "small", + "so", + "some", + "someone", + "something", + "sometime", + "sometimes", + "somewhere", + "sort", + "sorts", + "spend", + "spent", + "still", + "stuff", + "such", + "suggest", + "suggestion", + "suppose", + "sure", + "surely", + "surround", + "surrounds", + "take", + "taken", + "taking", + "tell", + "than", + "thank", + "thanks", + "that", + "that", + "thats", + "the", + "their", + "them", + "then", + "there", + "therefore", + "these", + "they", + "thing", + "things", + "this", + "those", + "though", + "thoughts", + "thouroughly", + "through", + "tiny", + "to", + "today", + "together", + "told", + "tomorrow", + "too", + "total", + "totally", + "touch", + "try", + "twice", + "under", + "understand", + "understood", + "until", + "up", + "us", + "used", + "using", + "usually", + "various", + "very", + "want", + "wanted", + "wants", + "was", + "watch", + "way", + "ways", + "we", + "re", + "well", + "went", + "were", + "what", + "what", + "whatever", + "whats", + "when", + "where", + "where", + "which", + "while", + "whilst", + "who", + "who", + "whom", + "will", + "wish", + "with", + "within", + "wonder", + "wonderful", + "worse", + "worst", + "would", + "wrong", + "yesterday", + "yet"] + class Entry: - def __init__(self, key): - self.key = key + def __init__(self): self.freqs = {} self.count = 0 @@ -30,7 +430,10 @@ class Entry: n = whrandom.randint(1, self.count) for symbol, freq in self.freqs.items(): total = total + freq - if n <= total: return symbol, -log(float(freq)/float(self.count))/log(2) + if n <= total: return symbol + + def prob(self, symbol): + return float(self.freqs[symbol])/float(self.count) class Brain: @@ -38,8 +441,7 @@ class Brain: whitespace_re = re.compile('\s+') def __init__(self): - self.leaders = {} - self.followers = {} + self.markov = {} def parse(self, sentence): """Convert all whitespace to a single space, split up the sentence into alternating words and non-words, and remove any empty symbols.""" @@ -47,25 +449,19 @@ class Brain: def train(self, sentence): # Split up the sentence and add sentinels at the beginning and end - sentence = [None, None, None, None] + self.parse(sentence) + [None, None, None, None] - for i in range(len(sentence) - 4): - symbol, sym1, sym2, sym3, sym4 = sentence[i:i+5] + sentence = [None, None, None, None, None] + self.parse(sentence) + [None, None, None, None] + for i in range(len(sentence) - 5): + symbol, sym1, sym2, sym3, sym4, sym5 = sentence[i:i+6] - key = symbol, sym1, sym2, sym3 - try: entry = self.followers[key] + key = sym1, sym2, sym3, sym4 + try: leaders, followers = self.markov[key] except KeyError: - entry = Entry(key) - self.followers[key] = entry - - entry.add_symbol(sym4) - - key = sym1, sym2, sym3, sym4 - try: entry = self.leaders[key] - except KeyError: - entry = Entry(key) - self.leaders[key] = entry + leaders = Entry() + followers = Entry() + self.markov[key] = (leaders, followers) - entry.add_symbol(symbol) + leaders.add_symbol(symbol) + followers.add_symbol(sym5) def train_from_file(self, file): if type(file) == type(''): file = open(file) @@ -74,57 +470,65 @@ class Brain: if line[0] != '#': self.train(line) line = file.readline() - def generate_replies(self, keywords, n=10): - """Generate a random sentence from a keyword.""" - # Initial seed is the word preceded by randomly selected preceder symbols - # Pick one seed per keyword - seeds = [] - for keyword in keywords: - candidates = filter(lambda key, k=keyword: key[3] == k, self.followers.keys()) - if candidates: seeds.append(whrandom.choice(candidates)) - - if not seeds: - # Pick n random seeds - keys = self.followers.keys() - for i in range(n): seeds.append(whrandom.choice(keys)) - - replies = [] - for i in range(n): - seed = whrandom.choice(seeds) - replies.append(self.generate_sentence(list(seed))) - - return replies - def generate_sentence(self, sentence): """Destructively modifies the seed!""" - total_entropy = 0.0 - symbols = 0 while sentence[-1]: - symbol, entropy = self.followers[tuple(sentence[-4:])].choose() + symbol = self.markov[tuple(sentence[-4:])][1].choose() sentence.append(symbol) - total_entropy = total_entropy + entropy - symbols = symbols + 1 # Generate the beginning of the sentence. while sentence[0]: - symbol, entropy = self.leaders[tuple(sentence[:4])].choose() + symbol = self.markov[tuple(sentence[:4])][0].choose() sentence = [symbol] + sentence - total_entropy = total_entropy + entropy - symbols = symbols + 1 # Strip sentinels while not sentence[0]: del sentence[0] while not sentence[-1]: del sentence[-1] - return total_entropy / symbols, sentence + return sentence def reply(self, sentence): - # Pick only word symbols that are in the frequency dict - keywords = filter(lambda k: k[0] in string.letters, self.parse(sentence)) - # Generate 10 replies for now - candidates = self.generate_replies(keywords, 10) - candidates.sort() - return ''.join(candidates[-1][1]).capitalize() + keywords = [] + for k in self.parse(sentence): + if k[0] in string.letters and k not in \ + STOPLIST: + try: k = SWAPLIST[k] + except KeyError: pass + if k not in keywords: keywords.append(k) + + keys = self.markov.keys() + candidates = [] + for keyword in keywords: + seeds = filter(lambda s, k=keyword: s[3] == k, keys) + if seeds: + # generate 10 replies per keyword + for i in range(10): + candidate = self.generate_sentence(list(whrandom.choice(seeds))) + # Calculate the probability of each keywords' appearing + # in its position + # This is a total hack. + total_logprob = 0 + num_keywords = 0 + key = [None, None, None, None] + for symbol in candidate: + if symbol in keywords: + total_logprob = total_logprob - \ + log(self.markov[tuple(key)][1].prob(symbol)) + num_keywords = num_keywords + 1 + + del key[0] + key.append(symbol) + + # Insert them in random order. + candidates.insert(whrandom.randint(0, len(candidates)), (total_logprob/num_keywords, candidate)) + + + if not candidates: + return 0.0, ''.join(self.generate_sentence(list(whrandom.choice(keys)))).capitalize() + + candidates.sort(lambda a, b: cmp(a[0], b[0])) + avg_logprob, sentence = candidates[-1] + return avg_logprob, ''.join(sentence).capitalize() def save(self, file): if type(file) == type(''): file = open(file, 'w') @@ -135,3 +539,15 @@ def load_brain(file): if type(file) == type(''): file = open(file, 'r') return load(file) + +def main(argv): + import microhal + b = microhal.Brain() + print >> sys.stderr, 'Training from %s...' % argv[1] + b.train_from_file(argv[1]) + print >> sys.stderr, 'Saving to %s...' % argv[2] + b.save(argv[2]) + print >> sys.stderr, 'Done.' + +if __name__ == '__main__': main(sys.argv) + diff --git a/pvui_pygame.py b/pvui_pygame.py @@ -1091,7 +1091,7 @@ class Client: except transutil.HandlerError, info: self.debug(info) return - self.server.chat(text.encode('utf-8')) + self.server.chat(text) else: self.redraw(self.entry.insert(event.unicode)) elif event.type == MOUSEBUTTONDOWN: if self.whichmouseover == '': @@ -1112,6 +1112,8 @@ class Client: # Transport-called functions + def push(self, x, y, speed): pass + def background_image(self, image): """Change the background""" self.background = image diff --git a/webutil.py b/webutil.py @@ -1,4 +1,4 @@ -import urllib, re, string, Image, os +import urllib, re, string, os from htmlentitydefs import entitydefs babel_regex = re.compile('name="q">([^<]+)</textarea>', re.MULTILINE) @@ -46,8 +46,10 @@ def metar(station): image_regex = re.compile('<img src=(/images\?q=tbn:\S+) width=(\d+) height=(\d+)') -def google_image(phrase): - data = urllib.urlopen('http://images.google.com/images?q=%s&imgsafe=off' % urllib.quote_plus(phrase)).read() +def google_image(phrase, safe=1): + if safe: imgsafe = 'on' + else: imgsafe = 'off' + data = urllib.urlopen('http://images.google.com/images?q=%s&imgsafe=%s' % (urllib.quote_plus(phrase), imgsafe)).read() images = [] while data: m = image_regex.search(data) @@ -57,9 +59,8 @@ def google_image(phrase): return images def image(url, filename): - localfile = urllib.urlretrieve(url)[0] - im = Image.open(localfile).convert('P', palette=Image.ADAPTIVE) - im.save(filename) + urllib.urlretrieve(url, 'google.jpg')[0] + os.spawnlp(os.P_WAIT, 'convert', 'convert', 'google.jpg', filename) urllib.urlcleanup()