Skip to content

Commit 93f0c59

Browse files
committed
hashtable
1 parent cb06bd8 commit 93f0c59

File tree

1 file changed

+155
-0
lines changed

1 file changed

+155
-0
lines changed

ehco/07/hashtable.py

Lines changed: 155 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
class Array(object):
2+
3+
def __init__(self, size=32, init=None):
4+
self._size = size
5+
self._items = [init] * size
6+
7+
def __getitem__(self, index):
8+
return self._items[index]
9+
10+
def __setitem__(self, index, value):
11+
self._items[index] = value
12+
13+
def __len__(self):
14+
return self._size
15+
16+
def clear(self, value=None):
17+
for i in range(self._items):
18+
self._items[i] = value
19+
20+
def __iter__(self):
21+
for item in self._items:
22+
yield item
23+
24+
25+
class Slot(object):
26+
"""定义一个 hash 表 数组的槽
27+
注意,一个槽有三种状态,看你能否想明白。相比链接法解决冲突,二次探查法删除一个 key 的操作稍微复杂。
28+
29+
1.从未使用 HashMap.UNUSED。此槽没有被使用和冲突过,查找时只要找到 UNUSED 就不用再继续探查了
30+
2.使用过但是 remove 了,此时是 HashMap.EMPTY,该探查点后边的元素扔可能是有key
31+
3.槽正在使用 Slot 节点
32+
"""
33+
34+
def __init__(self, key, value):
35+
self.key, self.value = key, value
36+
37+
38+
class HashTable(object):
39+
40+
# 没有被使用过的
41+
UNUSED = None
42+
# 使用过却被删除了
43+
EMPTY = Slot(None, None)
44+
45+
def __init__(self):
46+
self._table = Array(8, init=HashTable.UNUSED)
47+
self.length = 0
48+
49+
@property
50+
def _load_factor(self):
51+
# load factor 超过0.8的时候重新分配
52+
return self.length / float(len(self._table))
53+
54+
def __len__(self):
55+
return self.length
56+
57+
def _hash(self, key):
58+
return abs(hash(key)) % len(self._table)
59+
60+
def _find_key(self, key):
61+
index = self._hash(key)
62+
_len = len(self._table)
63+
while self._table[index] is not HashTable.UNUSED:
64+
if self._table[index] is HashTable.EMPTY:
65+
index = (index * 5 + 1) % _len
66+
continue
67+
elif self._table[index].key == key:
68+
return index
69+
else:
70+
index = (index * 5 + 1) % _len
71+
return None
72+
73+
def _find_slot_for_insert(self, key):
74+
index = self._hash(key)
75+
_len = len(self._table)
76+
while not self._slot_can_insert(index):
77+
index = (index * 5 + 1) % _len
78+
return index
79+
80+
def _slot_can_insert(self, index):
81+
return self._table[index] in (HashTable.EMPTY, HashTable.UNUSED)
82+
83+
def __contains__(self, key):
84+
index = self._find_key(key)
85+
return index is not None
86+
87+
def add(self, key, value):
88+
if key in self:
89+
index = self._find_key(key)
90+
self._table[index].value = value
91+
return False
92+
else:
93+
index = self._find_slot_for_insert(key)
94+
self._table[index] = Slot(key, value)
95+
self.length += 1
96+
if self._load_factor >= 0.8:
97+
self._rehash()
98+
return True
99+
100+
def _rehash(self):
101+
old_table = self._table
102+
newsize = len(self._table) * 2
103+
self._table = Array(newsize, HashTable.UNUSED)
104+
self.length = 0
105+
106+
for slot in old_table:
107+
if slot not in (HashTable.UNUSED, HashTable.EMPTY):
108+
index = self._find_slot_for_insert(slot.key)
109+
self._table[index] = slot
110+
self.length += 1
111+
112+
def get(self, key, default=None):
113+
index = self._find_key(key)
114+
if index is None:
115+
return default
116+
else:
117+
return self._table[index].value
118+
119+
def remove(self, key):
120+
index = self._find_key(key)
121+
if index is None:
122+
raise KeyError()
123+
value = self._table[index].value
124+
self.length -= 1
125+
self._table[index] = HashTable.EMPTY
126+
return value
127+
128+
def __iter__(self):
129+
for slot in self._table:
130+
if slot not in (HashTable.EMPTY, HashTable.UNUSED):
131+
yield slot.key
132+
133+
134+
def test_hash_table():
135+
h = HashTable()
136+
137+
h.add('a', 0)
138+
h.add('b', 1)
139+
h.add('c', 2)
140+
141+
assert len(h) == 3
142+
assert h.get('a') == 0
143+
assert h.get('b') == 1
144+
assert h.get('hehe') is None
145+
146+
h.remove('a')
147+
assert h.get('a') is None
148+
assert sorted(list(h)) == ['b', 'c']
149+
150+
n = 50
151+
for i in range(n):
152+
h.add(i, i)
153+
154+
for i in range(n):
155+
assert h.get(i) == i

0 commit comments

Comments
 (0)