more ex

Mia von Steinkirch · Mia von Steinkirch · commit 85ecba5ace75 · 2019-05-13T13:50:21.000-07:00
diff --git a/interview_cake/data_structures/file_system_hashing.py b/interview_cake/data_structures/file_system_hashing.py
@@ -0,0 +1,76 @@
+#!/bin/python
+
+"""
+Write a function that returns a list of all the duplicate files. 
+
+the first item is the duplicate file
+the second item is the original file
+For example:
+
+  [('/tmp/parker_is_dumb.mpg', '/home/parker/secret_puppy_dance.mpg'),
+ ('/home/trololol.mov', '/etc/apache2/httpd.conf')]
+You can assume each file was only duplicated once.
+"""
+
+import os
+import hashlib
+
+def find_duplicate_files(starting_directory):
+    files_seen_already = {}
+    stack = [starting_directory]
+
+    duplicates = []
+
+    while len(stack):
+        current_path = stack.pop()
+
+        if os.path.isdir(current_path):
+            for path in os.listdir(current_path):
+                full_path = os.path.join(current_path, path)
+                stack.append(full_path)
+
+        else:
+            file_hash = sample_hash_file(current_path)
+
+            current_last_edited_time = os.path.getmtime(current_path)
+
+            if file_hash in files_seen_already:
+                existing_last_edited_time, existing_path = files_seen_already[file_hash]
+                if current_last_edited_time > existing_last_edited_time:
+
+                    duplicates.append((current_path, existing_path))
+                else:
+
+                    duplicates.append((existing_path, current_path))
+                    files_seen_already[file_hash] = (current_last_edited_time, current_path)
+
+            else:
+                files_seen_already[file_hash] = (current_last_edited_time, current_path)
+
+    return duplicates
+
+
+def sample_hash_file(path):
+    num_bytes_to_read_per_sample = 4000
+    total_bytes = os.path.getsize(path)
+    hasher = hashlib.sha512()
+
+    with open(path, 'rb') as file:
+
+        if total_bytes < num_bytes_to_read_per_sample * 3:
+            hasher.update(file.read())
+        else:
+            num_bytes_between_samples = (
+                (total_bytes - num_bytes_to_read_per_sample * 3) / 2
+            )
+
+            for offset_multiplier in range(3):
+                start_of_sample = (
+                    offset_multiplier
+                    * (num_bytes_to_read_per_sample + num_bytes_between_samples)
+                )
+                file.seek(start_of_sample)
+                sample = file.read(num_bytes_to_read_per_sample)
+                hasher.update(sample)
+
+    return hasher.hexdigest()
diff --git a/interview_cake/math/apple_stocks.py b/interview_cake/math/apple_stocks.py
@@ -0,0 +1,27 @@
+#!/bin/python
+
+"""
+Grab Apple's stock prices and put them in a list called stock_prices, where:
+
+The indices are the time (in minutes) past trade opening time, which was 9:30am local time.
+The values are the price (in US dollars) of one share of Apple stock at that time.
+So if the stock cost $500 at 10:30am, that means stock_prices[60] = 500.
+
+Write an efficient function that takes stock_prices and returns the best profit I could have made from one purchase and one sale of one share.
+"""
+
+def apple_stock_profit(stock_prices):
+
+    min_s, max_s = max(stock_prices), 0
+
+    while stock_prices:
+        stock = stock_prices.pop()
+        min_s = min(min_s, stock)
+        max_s = max(max_s, stock)
+
+    return max_s - min_s
+
+
+stock_prices = [10, 7, 5, 8, 11, 9]
+print apple_stock_profit(stock_prices)
+print("Should return 6 (buying for $5 and selling for $11)")