Skip to content

Commit 924ef9b

Browse files
beqakdcclauss
andauthored
implementation of entropy algorithm. (#2110)
* implementation of entropy algorithm. * add tests, fix requested changes * open_file() --> analyze_text() * Create bidirectional_breadth_first_search.py * # type: ignore Co-authored-by: Christian Clauss <[email protected]>
1 parent a5c2467 commit 924ef9b

File tree

2 files changed

+133
-1
lines changed

2 files changed

+133
-1
lines changed

graphs/bidirectional_breadth_first_search.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,8 @@ def retrace_path(self, node: Node) -> List[Tuple[int]]:
100100

101101
class BidirectionalBreadthFirstSearch:
102102
"""
103-
>>> bd_bfs = BidirectionalBreadthFirstSearch((0, 0), (len(grid) - 1, len(grid[0]) - 1))
103+
>>> bd_bfs = BidirectionalBreadthFirstSearch((0, 0), (len(grid) - 1,
104+
... len(grid[0]) - 1))
104105
>>> bd_bfs.fwd_bfs.start.pos == bd_bfs.bwd_bfs.target.pos
105106
True
106107
>>> bd_bfs.retrace_bidirectional_path(bd_bfs.fwd_bfs.start,

maths/entropy.py

+131
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,131 @@
1+
#!/usr/bin/env python3
2+
3+
"""
4+
Implementation of entropy of information
5+
https://en.wikipedia.org/wiki/Entropy_(information_theory)
6+
"""
7+
8+
import math
9+
from collections import Counter
10+
from string import ascii_lowercase
11+
from typing import Tuple
12+
13+
14+
def calculate_prob(text: str) -> None:
15+
"""
16+
This method takes path and two dict as argument
17+
and than calculates entropy of them.
18+
:param dict:
19+
:param dict:
20+
:return: Prints
21+
1) Entropy of information based on 1 alphabet
22+
2) Entropy of information based on couples of 2 alphabet
23+
3) print Entropy of H(X n∣Xn−1)
24+
25+
Text from random books. Also, random quotes.
26+
>>> text = ("Behind Winston’s back the voice "
27+
... "from the telescreen was still "
28+
... "babbling and the overfulfilment")
29+
>>> calculate_prob(text)
30+
4.0
31+
6.0
32+
2.0
33+
34+
>>> text = ("The Ministry of Truth—Minitrue, in Newspeak [Newspeak was the official"
35+
... "face in elegant lettering, the three")
36+
>>> calculate_prob(text)
37+
4.0
38+
5.0
39+
1.0
40+
>>> text = ("Had repulsive dashwoods suspicion sincerity but advantage now him. "
41+
... "Remark easily garret nor nay. Civil those mrs enjoy shy fat merry. "
42+
... "You greatest jointure saw horrible. He private he on be imagine "
43+
... "suppose. Fertile beloved evident through no service elderly is. Blind "
44+
... "there if every no so at. Own neglected you preferred way sincerity "
45+
... "delivered his attempted. To of message cottage windows do besides "
46+
... "against uncivil. Delightful unreserved impossible few estimating "
47+
... "men favourable see entreaties. She propriety immediate was improving. "
48+
... "He or entrance humoured likewise moderate. Much nor game son say "
49+
... "feel. Fat make met can must form into gate. Me we offending prevailed "
50+
... "discovery.")
51+
>>> calculate_prob(text)
52+
4.0
53+
7.0
54+
3.0
55+
"""
56+
single_char_strings, two_char_strings = analyze_text(text)
57+
my_alphas = list(' ' + ascii_lowercase)
58+
# what is our total sum of probabilities.
59+
all_sum = sum(single_char_strings.values())
60+
61+
# one length string
62+
my_fir_sum = 0
63+
# for each alpha we go in our dict and if it is in it we calculate entropy
64+
for ch in my_alphas:
65+
if ch in single_char_strings:
66+
my_str = single_char_strings[ch]
67+
prob = my_str / all_sum
68+
my_fir_sum += prob * math.log2(prob) # entropy formula.
69+
70+
# print entropy
71+
print("{0:.1f}".format(round(-1 * my_fir_sum)))
72+
73+
# two len string
74+
all_sum = sum(two_char_strings.values())
75+
my_sec_sum = 0
76+
# for each alpha (two in size) calculate entropy.
77+
for ch0 in my_alphas:
78+
for ch1 in my_alphas:
79+
sequence = ch0 + ch1
80+
if sequence in two_char_strings:
81+
my_str = two_char_strings[sequence]
82+
prob = int(my_str) / all_sum
83+
my_sec_sum += prob * math.log2(prob)
84+
85+
# print second entropy
86+
print("{0:.1f}".format(round(-1 * my_sec_sum)))
87+
88+
# print the difference between them
89+
print("{0:.1f}".format(round(((-1 * my_sec_sum) - (-1 * my_fir_sum)))))
90+
91+
92+
def analyze_text(text: str) -> Tuple[dict, dict]:
93+
"""
94+
Convert text input into two dicts of counts.
95+
The first dictionary stores the frequency of single character strings.
96+
The second dictionary stores the frequency of two character strings.
97+
"""
98+
single_char_strings = Counter() # type: ignore
99+
two_char_strings = Counter() # type: ignore
100+
single_char_strings[text[-1]] += 1
101+
102+
# first case when we have space at start.
103+
two_char_strings[" " + text[0]] += 1
104+
for i in range(0, len(text) - 1):
105+
single_char_strings[text[i]] += 1
106+
two_char_strings[text[i : i + 2]] += 1
107+
return single_char_strings, two_char_strings
108+
109+
110+
def main():
111+
import doctest
112+
113+
doctest.testmod()
114+
# text = (
115+
# "Had repulsive dashwoods suspicion sincerity but advantage now him. Remark "
116+
# "easily garret nor nay. Civil those mrs enjoy shy fat merry. You greatest "
117+
# "jointure saw horrible. He private he on be imagine suppose. Fertile "
118+
# "beloved evident through no service elderly is. Blind there if every no so "
119+
# "at. Own neglected you preferred way sincerity delivered his attempted. To "
120+
# "of message cottage windows do besides against uncivil. Delightful "
121+
# "unreserved impossible few estimating men favourable see entreaties. She "
122+
# "propriety immediate was improving. He or entrance humoured likewise "
123+
# "moderate. Much nor game son say feel. Fat make met can must form into "
124+
# "gate. Me we offending prevailed discovery. "
125+
# )
126+
127+
# calculate_prob(text)
128+
129+
130+
if __name__ == "__main__":
131+
main()

0 commit comments

Comments
 (0)