misc_python

Sean's miscellaneous Python code that's not big enough for its own repo.
Log | Files | Refs | README | LICENSE

commit caa729e372bc6e14d8e486a84d28aad4de3c5a14
Author: Sean Lynch <seanl@literati.org>
Date:   Tue,  8 Jun 2010 09:25:34 -0700

Initial commit

Diffstat:
Abwt.py | 14++++++++++++++
Ahashtree.py | 249+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Amaze.py | 76++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Aotr.py | 25+++++++++++++++++++++++++
Arabin.py | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Arocket.py | 60++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Asieve.py | 85+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Atest.py | 12++++++++++++
8 files changed, 575 insertions(+), 0 deletions(-)

diff --git a/bwt.py b/bwt.py @@ -0,0 +1,14 @@ +#!/usr/bin/python +import sys + +def main(): + data = sys.stdin.read() + def bwt_cmp(a, b): + return cmp(data[a+1:], data[b+1:]) + indexes = range(len(data) + 1) + indexes.sort(cmp=bwt_cmp) + print ''.join([data[i] if i < len(data) else '' for i in indexes]) + + +if __name__ == '__main__': + main() diff --git a/hashtree.py b/hashtree.py @@ -0,0 +1,249 @@ +#!/usr/bin/python +import sys +from md5 import md5 +from bz2 import BZ2File + +MODULUS = 2**31 - 1 +MULTIPLIER = 3467255 + + +class Block(object): + __slots__ = ('strong_hash', 'fast_hash') + hash_func = md5 + + def __init__(self, strong_hash, fast_hash): + self.strong_hash = strong_hash + self.fast_hash = fast_hash + + @classmethod + def compute(cls, data): + r""" + >>> b1 = Block.compute('0' * 32) + >>> b1.strong_hash + '\xcd\x9eE\x9e\xa7\x08\xa9H\xd5\xc2\xf5\xa6\xca\x888\xcf' + >>> b1.fast_hash + 526142487 + >>> b2 = Block.compute('1' * 32) + >>> b2.strong_hash + '\x1a\x80\xcb\xf2\x85\x9e\x01\x95Y6]k\xe4xyN' + >>> b2.fast_hash + 134450605 + + """ + + strong_hash = cls.hash_func(data).digest() + fast_hash = rolling_hash(data)[-1] + return cls(strong_hash, fast_hash) + + def combine(self, other, num_chars): + r""" + >>> b1 = Block.compute('0' * 32) + >>> b2 = Block.compute('1' * 32) + >>> b3 = b1.combine(b2, 32) + >>> b3.strong_hash + '{\xe4\xcc\x9c\xb0\xe9\xccJ\xd6g\xa4B\xf7\xd0\x075' + >>> b3.fast_hash + 724473450 + + """ + + strong_hash = self.hash_func(self.strong_hash + other.strong_hash).digest() + fast_hash = combine_hashes(self.fast_hash, other.fast_hash, num_chars) + return self.__class__(strong_hash, fast_hash) + + +class HashTree(object): + r""" + >>> tree = HashTree(2, 3, 1) + >>> for c in 'hello world': + ... tree.insert_block(Block.compute(c)) + ... + >>> [b[0].strong_hash for b in tree.blocks] + ['\x82w\xe0\x91\ru\x01\x95\xb4Hyv\x16\xe0\x91\xad', '@Cg\xd3\x1eJ=\xce\xa8r\xe0\x9c~i/\xd1'] + + """ + + def __init__(self, max_size, levels, blocksize): + self.blocks = [] + self.hashes = {} + self.counts = [0] * levels + for i in xrange(levels): + self.blocks.append([]) + + self.max_size = max_size + self.blocksize = blocksize + + def remove_first_blocks(self, level, n=1): + blocks = self.blocks[level] + hashes = self.hashes + for i in xrange(n): + key = (blocks[i].fast_hash, level) + l = hashes[key] + if len(l) == 1: + del hashes[key] + else: + del l[0] + + del blocks[:n] + + def insert_block(self, block, level=0): + blocks = self.blocks + blocks[level].append(block) + self.hashes.setdefault((block.fast_hash, level), []).append(self.counts[level]) + self.counts[level] += 1 + if len(blocks[level]) > self.max_size: + if level == len(self.blocks) - 1: + self.remove_first_blocks(level) + else: + new_block = blocks[level][0].combine(blocks[level][1], self.blocksize << level) + self.insert_block(new_block, level + 1) + self.remove_first_blocks(level, 2) + + def find_fast_hash(self, fast_hash, level): + blocks = self.blocks[level] + count = self.counts[level] - len(blocks) + return [ i - count for i in self.hashes.get((fast_hash, level), []) ] + + def check_block(self, candidates, level, data): + blocks = self.blocks[level] + strong_hash = tree_hash(data, self.blocksize)[-1][0] + for candidate in candidates: + if strong_hash == blocks[candidate].strong_hash: + return candidate + + +def rolling_hash(s, window_size=0, start_index=0, start=0): + """ + >>> h1 = rolling_hash('hello')[-1] + >>> h1 + 1964986979 + + >>> rolling_hash(' world', start=h1)[-1] + 1526861753 + + >>> rolling_hash('hello world')[-1] + 1526861753 + + """ + + assert start_index >= window_size or start == 0 + r = [] + for i in xrange(start_index, len(s)): + start = (start * MULTIPLIER + ord(s[i])) % MODULUS + if window_size > 0 and i >= window_size: + start = (start - ord(s[i-window_size]) * pow(MULTIPLIER, window_size, MODULUS)) % MODULUS + + r.append(start) + + return r + + +def combine_hashes(h1, h2, num_chars_in_h2): + """ + >>> h1 = rolling_hash('hello')[-1] + >>> h2 = rolling_hash(' world')[-1] + >>> combine_hashes(h1, h2, 6) + 1526861753 + + """ + + for i in xrange(num_chars_in_h2): + h1 = h1 * MULTIPLIER % MODULUS + + return (h1 + h2) % MODULUS + + +def tree_hash(s, block_size, hash_func=md5): + r""" + >>> tree_hash('hellowld', 1) + [['%\x10\xc3\x90\x11\xc5\xbepA\x82B>:i^\x91', '\xe1g\x17\x97\xc5.\x15\xf7c8\x0bE\xe8A\xec2', '-\xb9^\x8e\x1a\x92g\xb7\xa1\x18\x85V\xb2\x01;3', '-\xb9^\x8e\x1a\x92g\xb7\xa1\x18\x85V\xb2\x01;3', '\xd9Vyu!4\xa2\xd9\xeba\xdb\xd7\xb9\x1cK\xcc', "\xf1)\x01\x86\xa5\xd0\xb1\xce\xab'\xf4\xe7|\x0c]h", '-\xb9^\x8e\x1a\x92g\xb7\xa1\x18\x85V\xb2\x01;3', '\x82w\xe0\x91\ru\x01\x95\xb4Hyv\x16\xe0\x91\xad'], ['\x84\x97\xf5\xe5\x84\xf6\x10u\x19\xaf\xa2Bz77/', '\r\xd6\xeb\xd9R>\xe4\x12sjJ+\xf78\x15\xd0', '\xbe.+\xd2t\x91`\xce\x81\x95mWd\x89%\x94', '.\xda\xa3N,rpy\xfd\xab?\xcde\xa9qu'], ['\xca\x95\x89\xdc\x00}|\xfe\xfd\xfb\xb3\xa2\x95N6M', '\x84\x96\r\xef\xb5\\\xff1t\x11%\x9as\xecL@'], ['\xa8\x0c\xa3\xce\xef\xe3\x18\x9a\xbb\xa8s\xdd\x0fz\xc2\x1a']] + + """ + + hashes = [[hash_func(s[i:i+block_size]).digest() for i in xrange(0, len(s), block_size)]] + while len(hashes[-1]) > 1: + h = hashes[-1] + hashes.append([hash_func(h[i] + h[i+1]).digest() for i in xrange(0, len(h), 2)]) + + return hashes + + +class Compressor(object): + def __init__(self, levels, blocksize, outfn): + self.levels = levels + self.blocksize = blocksize + self.buf = '' + self.hashes = [0] * levels + self.tree = HashTree(3276800, levels, blocksize) + self.outfp = BZ2File(outfn, 'w') + + def add_data(self, data): + pos = len(self.buf) + self.buf += data + buf = self.buf + levels = self.levels + hashes = [] + blocksize = self.blocksize + for level in xrange(len(self.hashes)): + h = rolling_hash(buf, window_size=blocksize<<level, start_index=pos, start=self.hashes[level]) + hashes.append(h) + if len(h) > 0: + self.hashes[level] = h[-1] + + tree = self.tree + + next_block = blocksize + i = 0 + outfp = self.outfp + while i < len(hashes[0]): + for level in xrange(levels-1, -1, -1): + hash_list = hashes[level] + h = hash_list[i] + candidate_blocks = tree.find_fast_hash(h, level) + if candidate_blocks: + p = i - (blocksize << level) + 1 + candidate_data = buf[p+pos:p+pos+(blocksize<<level)] + match = tree.check_block(candidate_blocks, level, candidate_data) + if match is None: + print 'miss %r %r' % (candidate_blocks, candidate_data) + else: + print 'hit:', match + i += blocksize << level + break + else: + outfp.write(buf[i+pos]) + i += 1 + + while i >= next_block: + # Insert data up to this point + tree.insert_block(Block.compute(buf[next_block-blocksize:next_block])) + next_block += blocksize + + self.buf = buf[-blocksize<<self.levels:] + + def close(self): + self.outfp.close() + + +def _test(): + import doctest + doctest.testmod() + + +def main(): + import sys + + comp = Compressor(4, 64, sys.argv[1]) + + data = sys.stdin.read(8192) + while data: + comp.add_data(data) + data = sys.stdin.read(8192) + + comp.close() + + +if __name__ == '__main__': + #_test() + main() + diff --git a/maze.py b/maze.py @@ -0,0 +1,76 @@ +"""Create a maze""" + +import random + + +def maze(w, h): + cells = [None] + for i in range(1,w+1): + row = [None] + for j in range(1,h+1): row.append([(i, j)]) + cells.append(row) + + walls = [] + for i in range(1,w): + for j in range(1,h): + walls.append((i,j,0)) + walls.append((i,j,1)) + + for i in range(1,w): + walls.append((i, h, 0)) + + for j in range(1,h): + walls.append((w, j, 1)) + + # Shuffle the list of walls + for i in range(len(walls)): + n = random.randint(i, len(walls)-1) + temp = walls[i] + walls[i] = walls[n] + walls[n] = temp + + upwalls = [] + for x, y, v in walls: + set = cells[x][y] + if v: + y1 = y+1 + x1 = x + else: + y1 = y + x1 = x+1 + + set1 = cells[x1][y1] + if set is set1: # Sets will be identical if they're in the same set + #print "Not knocking down wall" + upwalls.append((x, y, v)) + else: + # Concatenate the sets + set.extend(set1) + for i, j in set1: cells[i][j] = set + #print set + + # Add upper wall + for i in range(1,w+1): + upwalls.append((i, 0, 1)) + + # Add left hand wall + for j in range(1,h+1): + upwalls.append((0, j, 0)) + + # Add right hand wall + for j in range(1,h+1): + upwalls.append((w, j, 0)) + + # Add lower wall + for i in range(1,w+1): + upwalls.append((i, h, 1)) + + return upwalls + + +def main(): + print maze(10, 10) + + +if __name__ == '__main__': + main() diff --git a/otr.py b/otr.py @@ -0,0 +1,25 @@ +#!/usr/bin/python + +from ctypes import * + +_o = cdll.LoadLibrary('/usr/lib/libotr.so.2') +_o.otrl_init(3, 0, 0) + +class UserState(object): + def __init__(self): + self._p = _o.otrl_userstate_create() + + def __del__(self): + _o.otrl_userstate_free(self._p) + + def privkey_read(self, filename): + _o.otrl_privkey_read(filename) + + def privkey_read_fingerprints(self, filename): + _o.otrl_privkey_read_fingerprints(self._p, filename, None, None) + + def :wq + + + + diff --git a/rabin.py b/rabin.py @@ -0,0 +1,54 @@ +#!/usr/bin/python + +def bin(x, n=32): + s = '' + for i in xrange(32): + s = str(x & 0x1) + s + x >>= 1 + + return s + + +class Rabin(object): + def __init__(self, p=0xab59b4d1): + self.p = p + self.table = self.calculate_t8_table() + #self.window_table = self.calculate_window_table(16) + + def calculate_t8_table(self): + # This table is the result of multiplying by t**8 given the high-order 8 bits + return [ self.table_entry(i) for i in xrange(256) ] + + def calculate_window_table(self, window_size): + return [ self.multiply_repeatedly_by_t8(i, window_size) for i in xrange(256) ] + + def multiply_repeatedly_by_t8(self, x, n): + for i in xrange(n): + x = self.multiply_by_t8(x) + + return x + + def table_entry(self, i): + x = i << 24 + for j in xrange(8): + x = self.multiply_by_t(x) + print '%d\t%d\t%08x\t%s' % (i, j, x, bin(x)) + + return x + + def multiply_by_t8(self, x): + return ((x & 0xffffff) << 8) ^ self.table[x >> 24] + + def multiply_by_t(self, x): + if x & 0x80000000: + return ((x & 0x7fffffff) << 1) ^ self.p + else: + return x << 1 + + def fingerprint(self, fingerprint, s): + for c in s: + fingerprint = self.multiply_by_t8(fingerprint) ^ ord(c) + + return fingerprint + + diff --git a/rocket.py b/rocket.py @@ -0,0 +1,60 @@ +#!/usr/bin/python + +k1 = 0.190263 +k2 = 8.417286e-5 +E = 6369000L +L = 6.5 +g = 9.80665 +P0 = 101325 +T0 = 288.15 +R = 8.31432 +M = 28.9644 +Rd = 287.04 +Rv = 461.495 + +def actualPressure(AS, H): + """Actual pressure in mb from altimeter setting and geopotential elevation""" + return (AS**k1 - k2*H)**(1.0/k1) + +def vaporPressure(T): + """Vapor pressure in mb from dew point in K""" + eso=6.1078 + c0=0.99999683 + c1=-0.90826951e-2 + c2=0.78736169e-4 + c3=-0.61117958e-6 + c4=0.43884187*10-8 + c5=-0.29883885*10-10 + c6=0.21874425*10-12 + c7=-0.17892321*10-14 + c8=0.11112018*10-16 + c9=-0.30994571*10-19 + return eso*(c0+T*(c1+T*(c2+T*(c3+T*(c4+T*(c5+T*(c6+T*(c7+T*(c8+T*(c9))))))))))**(-8) + +def geometricAltitude(H): + """Geometric altitude from geopotential altitude""" + return (E*H)/(E-H) + +def geopotentialAltitude(Z): + return (E*Z)/(E+Z) + +#def densityAltitude(D): +# """Density altitude from density in kg/km^3""" +# return 44.3308 - 8.33851 * (D**0.324969) + +#def pressureAltitude(P): +# """Pressure altitude from actual pressure in pascals""" +# return 44.3308 - 4.94654 * (P**0.190263) + +def density(Pd, Pv, T): + """Density from dry air pressure in Pascals, vapor pressure in Pascals, and temperature in K""" + return (Pd / Rd * T) + (Pv + Rv * T) + +def airDensity(Z, AS, T, DP): + """Density from geometric altitude in meters, altimeter setting in millibars, temperature in K, and dew point in K""" + H = geopotentialAltitude(Z) + P = actualPressure(AS, H) + Pv = vaporPressure(DP) + Pd = P - Pv + return density(Pd, Pv, T) + diff --git a/sieve.py b/sieve.py @@ -0,0 +1,85 @@ +#!/usr/bin/python +import heapq, random + +def multiples(n): + i = n * n + n *= 2 + while True: + yield i + i += n + + +def sieve(): + yield 2 + n = 3 + composites = [] + while True: + if composites and composites[0][0] == n: + # Composite + while composites[0][0] == n: + iter = composites[0][1] + heapq.heapreplace(composites, (iter.next(), iter)) + else: + # Prime + iter = multiples(n) + heapq.heappush(composites, (iter.next(), iter)) + yield n + + n += 2 + + +def miller_rabin(n, k): + if n == 2: + return True + elif n < 2: + raise ValueError, 'n must be >= 2' + + d = n - 1 + s = 0 + while d & 1 == 0: + d >>= 1 + s += 1 + + for i in xrange(k): + a = random.randint(2, n-1) + x = pow(a, d, n) + if x == 1 or x == n - 1: + continue + + for r in xrange(1, s): + x = pow(x, 2, n) + if x == 1: + return False + elif x == n - 1: + break + else: + return False + + return True + + +def main(): + primes = [] + s = sieve() + for i in xrange(10000): + n = s.next() + print n + primes.append(n) + assert miller_rabin(n, 20) + + for i in xrange(10000): + n = random.randint(2, primes[-1]) + if miller_rabin(n, 20): + print n + assert n in primes + else: + assert n not in primes + + + + + +if __name__ == '__main__': + main() + + diff --git a/test.py b/test.py @@ -0,0 +1,12 @@ +h1 = rolling_hash('hello')[-1] +h1 +h2 = rolling_hash(' world', start=h1)[-1] +h2 +h3 = rolling_hash('hello world')[-1] +h3 +h4 = rolling_hash(' world')[-1] +h4 +h5 = combine_hashes(h1, h4, 6) +h5 + +