Skip to content

Commit c916cc9

Browse files
committed
Preindex hashtag searches
1 parent c61d72a commit c916cc9

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

pubsub/pubsub.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@
1313
from heapq import merge
1414
from bisect import bisect
1515
from sys import intern
16+
import re
1617

1718
User = str
1819
Timestamp = float
1920
HashAndSalt = Tuple[bytes, bytes]
21+
HashTag = str
2022

2123
class Post(NamedTuple):
2224
timestamp: float
@@ -32,16 +34,21 @@ class UserInfo(NamedTuple):
3234

3335
posts = deque() # type: Deque[Post] # Posts from newest to oldest
3436
user_posts = defaultdict(deque) # type: DefaultDict[User, Deque[Post]]
37+
hashtag_index = defaultdict(deque) # type: DefaultDict[HashTag, Deque[Post]]
3538
following = defaultdict(set) # type: DefaultDict[User, Set[User]]
3639
followers = defaultdict(set) # type: DefaultDict[User, Set[User]]
3740
user_info = dict() # type: Dict[User, UserInfo]
3841

42+
hashtag_pattern = re.compile(r'#\w+')
43+
3944
def post_message(user: User, text: str, timestamp: Optional[Timestamp]=None) -> None:
4045
user = intern(user)
4146
timestamp = timestamp or time()
4247
post = Post(timestamp, user, text)
4348
posts.appendleft(post)
4449
user_posts[user].appendleft(post)
50+
for hashtag in hashtag_pattern.findall(text):
51+
hashtag_index[hashtag].appendleft(post)
4552

4653
def follow(user: User, followed_user: User) -> None:
4754
user, followed_user = intern(user), intern(followed_user)
@@ -62,7 +69,8 @@ def get_followed(user: User) -> List[User]:
6269
return sorted(following[user])
6370

6471
def search(phrase:str, limit: Optional[int] = None) -> List[Post]:
65-
# XXX this could benefit from caching and from preindexing
72+
if hashtag_pattern.match(phrase):
73+
return list(islice(hashtag_index[phrase], limit))
6674
return list(islice((post for post in posts if phrase in post.text), limit))
6775

6876
def hash_password(password: str, salt: Optional[bytes] = None) -> HashAndSalt:

pubsub/session.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
follow('raymondh', followed_user='barry')
3535

3636
if __name__ == '__main__':
37-
from pubsub import posts, followers, following, user_info
37+
from pubsub import posts, followers, following, user_info, hashtag_index
3838
from pubsub import posts_by_user, posts_for_user, search
3939
from pubsub import get_followers, get_followed, get_user, check_user, age
4040

@@ -57,6 +57,8 @@
5757
pprint(posts_for_user('davin', limit=6))
5858
print('\nSearch for #python (4)')
5959
pprint(search('#python', limit=4))
60+
print('\nSearch for python (4)')
61+
pprint(search('python', limit=4))
6062
print("\nRaymond's followers")
6163
pprint(get_followers('raymondh'))
6264
print("\nDavin follows")
@@ -65,3 +67,5 @@
6567
print(get_user('barry'))
6668
print("\nPost ages")
6769
pprint(list(map(age, posts)))
70+
print('\nHash tag index:')
71+
pprint(hashtag_index)

0 commit comments

Comments
 (0)