Skip to content

Commit 49962d5

Browse files
committed
add topk.py
1 parent 2358ee6 commit 49962d5

File tree

1 file changed

+43
-0
lines changed

1 file changed

+43
-0
lines changed

docs/15_堆与堆排序/topk.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import heapq
2+
3+
4+
class TopK:
5+
"""获取大量元素 topk 大个元素,固定内存
6+
思路:
7+
1. 先放入元素前 k 个建立一个最小堆
8+
2. 迭代剩余元素:
9+
如果当前元素小于堆顶元素,跳过该元素(肯定不是前 k 大)
10+
否则替换堆顶元素为当前元素,并重新调整堆
11+
"""
12+
13+
def __init__(self, iterable, k):
14+
self.minheap = []
15+
self.capacity = k
16+
self.iterable = iterable
17+
18+
def push(self, val):
19+
if len(self.minheap) >= self.capacity:
20+
min_val = self.minheap[0]
21+
if val < min_val: # 当然你可以直接 if val > min_val操作,这里我只是显示指出跳过这个元素
22+
pass
23+
else:
24+
heapq.heapreplace(self.minheap, val) # 返回并且pop堆顶最小值,推入新的 val 值并调整堆
25+
else:
26+
heapq.heappush(self.minheap, val) # 前面 k 个元素直接放入minheap
27+
28+
def get_topk(self):
29+
for val in self.iterable:
30+
self.push(val)
31+
return self.minheap
32+
33+
34+
def test():
35+
import random
36+
i = list(range(1000)) # 这里可以是一个可迭代元素,节省内存
37+
random.shuffle(i)
38+
_ = TopK(i, 10)
39+
print(_.get_topk()) # [990, 991, 992, 996, 994, 993, 997, 998, 999, 995]
40+
41+
42+
if __name__ == '__main__':
43+
test()

0 commit comments

Comments
 (0)