From 2759077c1f5690bfc45c6d2bdac9e2d101dcc020 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 3 May 2020 01:06:21 -0700 Subject: [PATCH 1/4] Use integer math for combinations() --- resampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resampling.py b/resampling.py index 2c08f42..933db2a 100644 --- a/resampling.py +++ b/resampling.py @@ -3,7 +3,7 @@ from statistics import mean def combinations(n, r): - return fact(n) / fact(r) / fact(n-r) + return fact(n) // (fact(r) * fact(n-r)) def cum_binom(n, r, ph): total = 0.0 From 95514416e047eabe93ebaa94971576df737c184c Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sun, 3 May 2020 01:10:53 -0700 Subject: [PATCH 2/4] Use the class form of NamedTuple --- congress.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/congress.py b/congress.py index ea998bd..888b200 100644 --- a/congress.py +++ b/congress.py @@ -11,9 +11,13 @@ import csv import glob -Senator = NamedTuple('Senator', [('name', str), ('party', str), ('state', str)]) VoteValue = float +class Senator(NamedTuple): + name: str + party: str + state: str + # Load votes arranged by topic and accumulate votes by senator vote_value = {'Nay': -1, 'Not Voting': 0, 'Yea': 1} # type: Dict[str, VoteValue] accumulated_record = defaultdict(list) # type: DefaultDict[Senator, List[VoteValue]] From e24a8e924a88eb2a2d336f9894ed122845e1e751 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Wed, 20 May 2020 22:11:21 -0700 Subject: [PATCH 3/4] Minor beautifications --- pubsub/pubsub.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pubsub/pubsub.py b/pubsub/pubsub.py index 50e784d..5aa3830 100644 --- a/pubsub/pubsub.py +++ b/pubsub/pubsub.py @@ -74,9 +74,10 @@ def search(phrase: str, limit: Optional[int] = None) -> List[Post]: def hash_password(password: str, salt: Optional[bytes] = None) -> HashAndSalt: pepper = b'alchemists discovered that gold came from earth air fire and water' salt = salt or secrets.token_bytes(16) - return hashlib.pbkdf2_hmac('sha512', password.encode(), salt+pepper, 100000), salt + return hashlib.pbkdf2_hmac('sha512', password.encode(), salt+pepper, 100_000), salt -def set_user(user: User, displayname: str, email: str, password: str, bio: Optional[str]=None, photo: Optional[str]=None) -> None: +def set_user(user: User, displayname: str, email: str, password: str, + bio: Optional[str]=None, photo: Optional[str]=None) -> None: user = intern(user) hashed_password = hash_password(password) user_info[user] = UserInfo(displayname, email, hashed_password, bio, photo) From d305a097607acce12fb687a3a24da993fdfb6353 Mon Sep 17 00:00:00 2001 From: Raymond Hettinger Date: Sat, 30 May 2020 15:30:37 -0700 Subject: [PATCH 4/4] Add queue simulations --- resampling.py | 70 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 64 insertions(+), 6 deletions(-) diff --git a/resampling.py b/resampling.py index 933db2a..fb09154 100644 --- a/resampling.py +++ b/resampling.py @@ -1,6 +1,10 @@ +from random import random, randrange, shuffle, expovariate, gauss +from statistics import mean, median, stdev from math import factorial as fact -from random import random, randrange, shuffle -from statistics import mean +from heapq import heappush, heappop + +############################################################################# +# Helper functions for analysis def combinations(n, r): return fact(n) // (fact(r) * fact(n-r)) @@ -11,9 +15,8 @@ def cum_binom(n, r, ph): total += ph ** h * (1.0 - ph) ** (n - h) * combinations(n, h) return total -# Estimate the probability of getting 42500 or more heads -# from 70000 spins of biased coin (60% heads and 40% tails) +############################################################################# # Estimate the probability of getting 5 or more heads # from 7 spins of biased coin (60% heads and 40% tails) print(cum_binom(7, 5, 0.60)) @@ -32,13 +35,16 @@ def trial(): trial = lambda : sum(random() < 0.60 for i in range(7)) >= 5 print(sum(trial() for i in range(10000)) / 10000.0) + +############################################################################# # Probability of the median of 5 samples being in the # middle two quartiles of the population trial = lambda : 2500 <= sorted(randrange(10000) for i in range(5))[2] < 7500 print(sum(trial() for i in range(10000)) / 10000.0) # compare with the beta distribution -# An exact permutation text -# Hypothesis testing + +############################################################################# +# Hypothesis testing with an exact permutation text drug = [54, 73, 53, 70, 73, 68, 52, 65, 65] placebo = [54, 51, 58, 44, 55, 52, 42, 47, 58, 46] obs_diff = mean(drug) - mean(placebo) @@ -57,3 +63,55 @@ def trial(): print(cases / 10000, '<-- The p-value') print('Accordlingly, we reject the null hypothesis') print('and conclude the observed difference was not due to chance') + + +############################################################################# +# Simulation of arrival times and service deliveries in a single server queue + +average_arrival_interval = 5.6 +average_service_time = 5.0 +stdev_service_time = 0.5 + +num_waiting = 0 +arrivals = [] +starts = [] +arrival = service_end = 0.0 +for i in range(20000): + if arrival <= service_end: + num_waiting += 1 + arrival += expovariate(1.0 / average_arrival_interval) + arrivals.append(arrival) + else: + num_waiting -= 1 + service_start = service_end if num_waiting else arrival + service_time = gauss(average_service_time, stdev_service_time) + service_end = service_start + service_time + starts.append(service_start) + +waits = [start - arrival for arrival, start in zip(arrivals, starts)] +print(f'Mean wait: {mean(waits):.1f}. Stdev wait: {stdev(waits):.1f}.') +print(f'Median wait: {median(waits):.1f}. Max wait: {max(waits):.1f}.') + + +############################################################################ +# Simulation of arrival times and service deliveries for a multiserver queue + +average_arrival_interval = 5.6 +average_service_time = 15.0 +stdev_service_time = 3.5 +num_servers = 3 + +waits = [] +arrival_time = 0.0 +servers = [0.0] * num_servers # time when each server becomes available +for i in range(100_000): + arrival_time += expovariate(1.0 / average_arrival_interval) + next_server_available = heappop(servers) + wait = max(0.0, next_server_available - arrival_time) + waits.append(wait) + service_duration = gauss(average_service_time, stdev_service_time) + service_completed = arrival_time + wait + service_duration + heappush(servers, service_completed) + +print(f'Mean wait: {mean(waits):.1f}. Stdev wait: {stdev(waits):.1f}.') +print(f'Median wait: {median(waits):.1f}. Max wait: {max(waits):.1f}.')