5
5
6
6
class Array (object ):
7
7
8
- def __init__ (self , size = 32 ):
8
+ def __init__ (self , size = 32 , init = None ):
9
9
self ._size = size
10
- self ._items = [None ] * size
10
+ self ._items = [init ] * size
11
11
12
12
def __getitem__ (self , index ):
13
13
return self ._items [index ]
@@ -41,96 +41,88 @@ def __init__(self, key, value):
41
41
42
42
class HashTable (object ):
43
43
44
- UNUSED = None # 没被使用过的槽,作为该类变量的一个单例,下边都是is 判断
45
- EMPTY = Slot (None , None ) # 使用过但是被删除的槽
44
+ UNUSED = None # 没被使用过
45
+ EMPTY = Slot (None , None ) # 使用却被删除过
46
46
47
47
def __init__ (self ):
48
- self ._table = Array (7 )
48
+ self ._table = Array (8 , init = HashTable . UNUSED ) # 保持 2*i 次方
49
49
self .length = 0
50
50
51
51
@property
52
52
def _load_factor (self ):
53
- # load factor 超过 2/3 就重新分配空间
53
+ # load_factor 超过 0.8 重新分配
54
54
return self .length / float (len (self ._table ))
55
55
56
56
def __len__ (self ):
57
57
return self .length
58
58
59
- def _hash1 (self , key ):
60
- """ 计算key的hash值"""
59
+ def _hash (self , key ):
61
60
return abs (hash (key )) % len (self ._table )
62
61
63
- def _find_slot (self , key , for_insert = False ):
64
- """_find_slot
65
-
66
- :param key:
67
- :param for_insert: 是否插入,还是仅仅查询
68
- :return: slot index or None
69
- """
70
- index = self ._hash1 (key )
71
- base_index = index
72
- hash_times = 1
62
+ def _find_key (self , key ):
63
+ index = self ._hash (key )
73
64
_len = len (self ._table )
74
-
75
- if not for_insert : # 查找是否存在 key
76
- while self . _table [ index ] is not HashTable . UNUSED :
77
- if self . _table [ index ] is HashTable . EMPTY :
78
- index = ( base_index + hash_times * hash_times ) % _len # 一个简单的二次方探查
79
- continue
80
- elif self . _table [ index ]. key == key :
81
- return index
82
- index = ( base_index + hash_times * hash_times ) % _len
83
- hash_times += 1
84
- return None
85
- else :
86
- while not self ._slot_can_insert ( index ): # 循环直到找到一个可以插入的槽
87
- index = ( base_index + hash_times * hash_times ) % _len
88
- hash_times += 1
89
- return index
65
+ while self . _table [ index ] is not HashTable . UNUSED :
66
+ if self . _table [ index ] is HashTable . EMPTY :
67
+ index = ( index * 5 + 1 ) % _len
68
+ continue
69
+ elif self . _table [ index ]. key == key :
70
+ return index
71
+ else :
72
+ index = ( index * 5 + 1 ) % _len
73
+ return None
74
+
75
+ def _find_slot_for_insert ( self , key ):
76
+ index = self . _hash ( key )
77
+ _len = len ( self ._table )
78
+ while not self . _slot_can_insert ( index ):
79
+ index = ( index * 5 + 1 ) % _len
80
+ return index
90
81
91
82
def _slot_can_insert (self , index ):
92
83
return (self ._table [index ] is HashTable .EMPTY or self ._table [index ] is HashTable .UNUSED )
93
84
94
- def __contains__ (self , key ): # in operator
95
- index = self ._find_slot (key , for_insert = False )
85
+ def __contains__ (self , key ): # in operator
86
+ index = self ._find_key (key )
96
87
return index is not None
97
88
98
89
def add (self , key , value ):
99
- if key in self : # key 相同值不一样的时候,用新的值
100
- index = self ._find_slot (key , for_insert = False )
90
+ if key in self :
91
+ index = self ._find_key (key )
101
92
self ._table [index ].value = value
102
93
return False
103
94
else :
104
- index = self ._find_slot (key , for_insert = True )
95
+ index = self ._find_slot_for_insert (key )
105
96
self ._table [index ] = Slot (key , value )
106
97
self .length += 1
107
- if self ._load_factor >= 0.8 : # 注意超过了 阈值 rehashing
98
+ if self ._load_factor >= 0.8 :
108
99
self ._rehash ()
109
100
return True
110
101
111
102
def _rehash (self ):
112
103
old_table = self ._table
113
- newsize = len (self ._table ) * 2 + 1 # 扩大 2*n + 1
114
- self ._table = Array (newsize )
104
+ newsize = len (self ._table ) * 2
105
+ self ._table = Array (newsize , HashTable . UNUSED )
115
106
116
107
self .length = 0
117
108
118
109
for slot in old_table :
119
110
if slot is not HashTable .UNUSED and slot is not HashTable .EMPTY :
120
- index = self ._find_slot (slot .key , for_insert = True )
111
+ index = self ._find_slot_for_insert (slot .key )
121
112
self ._table [index ] = slot
122
113
self .length += 1
123
114
124
115
def get (self , key , default = None ):
125
- index = self ._find_slot (key , for_insert = False )
116
+ index = self ._find_key (key )
126
117
if index is None :
127
118
return default
128
119
else :
129
120
return self ._table [index ].value
130
121
131
122
def remove (self , key ):
132
- assert key in self , 'keyerror'
133
- index = self ._find_slot (key , for_insert = False )
123
+ index = self ._find_key (key )
124
+ if index is None :
125
+ raise KeyError ()
134
126
value = self ._table [index ].value
135
127
self .length -= 1
136
128
self ._table [index ] = HashTable .EMPTY
@@ -139,28 +131,34 @@ def remove(self, key):
139
131
def __iter__ (self ):
140
132
for slot in self ._table :
141
133
if slot not in (HashTable .EMPTY , HashTable .UNUSED ):
142
- yield slot .key # 和 python dict 一样,默认遍历 key,需要value 的话写个 items() 方法
134
+ yield slot .key
143
135
144
136
145
137
def test_hash_table ():
146
138
h = HashTable ()
147
139
h .add ('a' , 0 )
148
140
h .add ('b' , 1 )
149
141
h .add ('c' , 2 )
150
-
151
142
assert len (h ) == 3
152
143
assert h .get ('a' ) == 0
153
144
assert h .get ('b' ) == 1
154
145
assert h .get ('hehe' ) is None
155
146
156
147
h .remove ('a' )
157
148
assert h .get ('a' ) is None
158
-
159
149
assert sorted (list (h )) == ['b' , 'c' ]
160
150
161
- # 50 超过了 HashTable 的原始 size,我们测试下是否 reshah 操作能正确工作
162
- for i in range (50 ):
151
+ n = 50
152
+ for i in range (n ):
163
153
h .add (i , i )
164
154
165
- for i in range (50 ):
155
+ for i in range (n ):
166
156
assert h .get (i ) == i
157
+
158
+
159
+ if __name__ == '__main__' :
160
+ print (
161
+ 'beg' ,
162
+ test_hash_table (),
163
+ 'end' ,
164
+ )
0 commit comments