[Python-Dev] The Dictionary Gem is polished!
Christian Tismer
tismer@tismer.com
Sun, 17 Dec 2000 20:10:07 +0200
This is a multi-part message in MIME format.
--------------D1825E07B23FE5AC1D48DB49
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit
Christian Tismer wrote:
...
(my timings)
Attached is the updated script with the timings mentioned
in the last posting. Sorry, I posted an older version before.
--
Christian Tismer :^) <mailto:tismer@tismer.com>
Mission Impossible 5oftware : Have a break! Take a ride on Python's
Kaunstr. 26 : *Starship* http://starship.python.net
14163 Berlin : PGP key -> http://wwwkeys.pgp.net
PGP Fingerprint E182 71C7 1A9D 66E9 9D15 D3CC D4D7 93E2 1FAE F6DF
where do you want to jump today? http://www.stackless.com
--------------D1825E07B23FE5AC1D48DB49
Content-Type: text/plain; charset=us-ascii;
name="dictest.py"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
filename="dictest.py"
## dictest.py
## Test of a new rehash algorithm
## Chris Tismer
## 2000-12-17
## Mission Impossible 5oftware Team
# The following is a partial re-implementation of
# Python dictionaries in Python.
# The original algorithm was literally turned
# into Python code.
##/*
##Table of irreducible polynomials to efficiently cycle through
##GF(2^n)-{0}, 2<=n<=30.
##*/
polys = [
4 + 3,
8 + 3,
16 + 3,
32 + 5,
64 + 3,
128 + 3,
256 + 29,
512 + 17,
1024 + 9,
2048 + 5,
4096 + 83,
8192 + 27,
16384 + 43,
32768 + 3,
65536 + 45,
131072 + 9,
262144 + 39,
524288 + 39,
1048576 + 9,
2097152 + 5,
4194304 + 3,
8388608 + 33,
16777216 + 27,
33554432 + 9,
67108864 + 71,
134217728 + 39,
268435456 + 9,
536870912 + 5,
1073741824 + 83,
0
]
class NULL: pass
class Dictionary:
dummy = "<dummy key>"
def __init__(mp, newalg=0):
mp.ma_size = 0
mp.ma_poly = 0
mp.ma_table = []
mp.ma_fill = 0
mp.ma_used = 0
mp.oldalg = not newalg
def lookdict(mp, key, _hash):
me_hash, me_key, me_value = range(3) # rec slots
dummy = mp.dummy
mask = mp.ma_size-1
ep0 = mp.ma_table
i = (~_hash) & mask
ep = ep0[i]
if ep[me_key] is NULL or ep[me_key] == key:
return ep
if ep[me_key] == dummy:
freeslot = ep
else:
if (ep[me_hash] == _hash and
cmp(ep[me_key], key) == 0) :
return ep
freeslot = NULL
###### FROM HERE
if mp.oldalg:
incr = (_hash ^ (_hash >> 3)) & mask
else:
# note that we do not mask!
# even the shifting my not be worth it.
incr = _hash ^ (_hash >> 3)
###### TO HERE
if (not incr):
incr = mask
while 1:
ep = ep0[(i+incr)&mask]
if (ep[me_key] is NULL) :
if (freeslot != NULL) :
return freeslot
else:
return ep
if (ep[me_key] == dummy) :
if (freeslot == NULL):
freeslot = ep
elif (ep[me_key] == key or
(ep[me_hash] == _hash and
cmp(ep[me_key], key) == 0)) :
return ep
# Cycle through GF(2^n)-{0}
###### FROM HERE
if mp.oldalg:
incr = incr << 1
if (incr > mask):
incr = incr ^ mp.ma_poly
else:
# new algorithm: do a division
if (incr & 1):
incr = incr ^ mp.ma_poly
incr = incr >> 1
###### TO HERE
def insertdict(mp, key, _hash, value):
me_hash, me_key, me_value = range(3) # rec slots
ep = mp.lookdict(key, _hash)
if (ep[me_value] is not NULL) :
old_value = ep[me_value]
ep[me_value] = value
else :
if (ep[me_key] is NULL):
mp.ma_fill=mp.ma_fill+1
ep[me_key] = key
ep[me_hash] = _hash
ep[me_value] = value
mp.ma_used = mp.ma_used+1
def dictresize(mp, minused):
me_hash, me_key, me_value = range(3) # rec slots
oldsize = mp.ma_size
oldtable = mp.ma_table
MINSIZE = 4
newsize = MINSIZE
for i in range(len(polys)):
if (newsize > minused) :
newpoly = polys[i]
break
newsize = newsize << 1
else:
return -1
_nullentry = range(3)
_nullentry[me_hash] = 0
_nullentry[me_key] = NULL
_nullentry[me_value] = NULL
newtable = map(lambda x,y=_nullentry:y[:], range(newsize))
mp.ma_size = newsize
mp.ma_poly = newpoly
mp.ma_table = newtable
mp.ma_fill = 0
mp.ma_used = 0
for ep in oldtable:
if (ep[me_value] is not NULL):
mp.insertdict(ep[me_key],ep[me_hash],ep[me_value])
return 0
# PyDict_GetItem
def __getitem__(op, key):
me_hash, me_key, me_value = range(3) # rec slots
if not op.ma_table:
raise KeyError, key
_hash = hash(key)
return op.lookdict(key, _hash)[me_value]
# PyDict_SetItem
def __setitem__(op, key, value):
mp = op
_hash = hash(key)
## /* if fill >= 2/3 size, double in size */
if (mp.ma_fill*3 >= mp.ma_size*2) :
if (mp.dictresize(mp.ma_used*2) != 0):
if (mp.ma_fill+1 > mp.ma_size):
raise MemoryError
mp.insertdict(key, _hash, value)
# more interface functions
def keys(self):
me_hash, me_key, me_value = range(3) # rec slots
res = []
for _hash, _key, _value in self.ma_table:
if _value is not NULL:
res.append( _key)
return res
def values(self):
me_hash, me_key, me_value = range(3) # rec slots
res = []
for _hash, _key, _value in self.ma_table:
if _value is not NULL:
res.append( _value)
return res
def items(self):
me_hash, me_key, me_value = range(3) # rec slots
res = []
for _hash, _key, _value in self.ma_table:
if _value is not NULL:
res.append( (_key, _value) )
return res
def __cmp__(self, other):
mine = self.items()
others = other.items()
mine.sort()
others.sort()
return cmp(mine, others)
######################################################
## tests
def timing(func, args=None, n=1, **keywords) :
import time
time=time.time
appl=apply
if args is None: args = ()
if type(args) != type(()) : args=(args,)
rep=range(n)
dummyarg = ("",)
dummykw = {}
dummyfunc = len
if keywords:
before=time()
for i in rep: res=appl(dummyfunc, dummyarg, dummykw)
empty = time()-before
before=time()
for i in rep: res=appl(func, args, keywords)
else:
before=time()
for i in rep: res=appl(dummyfunc, dummyarg)
empty = time()-before
before=time()
for i in rep: res=appl(func, args)
after = time()
return round(after-before-empty,4), res
def test(lis, dic):
for key in lis: dic[key]
def nulltest(lis, dic):
for key in lis: dic
def string_dicts():
d1 = Dictionary() # original
d2 = Dictionary(1) # other rehash
for i in range(1000):
s = str(i) * 5
d1[s] = d2[s] = i
return d1, d2
def badnum_dicts():
d1 = Dictionary() # original
d2 = Dictionary(1) # other rehash
shift = 10
if EXTREME:
shift = 16
for i in range(1000):
bad = i << 16
d1[bad] = d2[bad] = i
return d1, d2
def do_test(dict, keys, n):
t0 = timing(nulltest, (keys, dict), n)[0]
t1 = timing(test, (keys, dict), n)[0]
return t1-t0
EXTREME=1
if __name__ == "__main__":
sdold, sdnew = string_dicts()
bdold, bdnew = badnum_dicts()
print "timing for strings old=%.3f new=%.3f" % (
do_test(sdold, sdold.keys(), 100),
do_test(sdnew, sdnew.keys(), 100) )
print "timing for bad integers old=%.3f new=%.3f" % (
do_test(bdold, bdold.keys(), 10) *10,
do_test(bdnew, bdnew.keys(), 10) *10)
"""
Results with a shift of 10 (EXTREME=0):
D:\crml_doc\platf\py>python dictest.py
timing for strings old=5.097 new=5.088
timing for bad integers old=101.540 new=12.610
Results with a shift of 16 (EXTREME=1):
D:\crml_doc\platf\py>python dictest.py
timing for strings old=5.218 new=5.147
timing for bad integers old=571.210 new=19.220
"""
--------------D1825E07B23FE5AC1D48DB49--