docs: differences hashtable slot EMPTY vs UNUSED

PegasusWang · PegasusWang · commit 517461cbd124 · 2019-10-20T16:38:39.000+08:00
diff --git a/docs/07_哈希表/hashtable.py b/docs/07_哈希表/hashtable.py
@@ -28,7 +28,7 @@ def __iter__(self):
 
 
 class Slot(object):
-    """定义一个 hash 表 数组的槽
+    """定义一个 hash 表数组的槽(slot 这里指的就是数组的一个位置)
     注意，一个槽有三种状态，看你能否想明白。相比链接法解决冲突，二次探查法删除一个 key 的操作稍微复杂。
     1.从未使用 HashMap.UNUSED。此槽没有被使用和冲突过，查找时只要找到 UNUSED 就不用再继续探查了
     2.使用过但是 remove 了，此时是 HashMap.EMPTY，该探查点后边的元素扔可能是有key
@@ -60,10 +60,15 @@ def _hash(self, key):
         return abs(hash(key)) % len(self._table)
 
     def _find_key(self, key):
+        """
+        解释一个 slot 为 UNUSED 和 EMPTY 的区别
+        因为使用的是二次探查的方式，假如有两个元素 A，B 冲突了，首先A hash 得到是 slot 下标5，A 放到了第5个槽，之后插入 B 因为冲突了，所以继续根据二次探查方式放到了 slot8。
+        然后删除 A，槽 5 被置为 EMPTY。然后我去查找 B，第一次 hash 得到的是 槽5，但是这个时候我还是需要第二次计算 hash 才能找到 B。但是如果槽是 UNUSED 我就不用继续找了，我认为 B 就是不存在的元素。这个就是 UNUSED 和 EMPTY 的区别。
+        """
         index = self._hash(key)
         _len = len(self._table)
         while self._table[index] is not HashTable.UNUSED:
-            if self._table[index] is HashTable.EMPTY:
+            if self._table[index] is HashTable.EMPTY:  # 注意如果是 EMPTY，继续寻找下一个槽
                 index = (index*5 + 1) % _len
                 continue
             elif self._table[index].key == key:
@@ -75,7 +80,7 @@ def _find_key(self, key):
     def _find_slot_for_insert(self, key):
         index = self._hash(key)
         _len = len(self._table)
-        while not self._slot_can_insert(index):
+        while not self._slot_can_insert(index): # 直到找到一个可以用的槽
             index = (index*5 + 1) % _len
         return index