Skip to content

Commit 880a1f1

Browse files
committed
Lesson 07 completed
1 parent b484826 commit 880a1f1

21 files changed

+10857
-1756
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
Assignment 07 findings
2+
-------------------------
3+
1. I thoroughly enjoyed this lesson. I have dabbled in threading with some applications
4+
written, but never to this level. I really wanted to understand and make the
5+
assignment work with async programming, having never successfully implemented it.
6+
7+
2. asyncio was more difficult that using drop-in threading but I feel confident to try
8+
it in the real world.
9+
10+
3. My time improvement with async was not as much as expected, but I am not surprised
11+
that the databasing modules are well optimized themselves and everything is
12+
performed locally on disc. I feel a ~30% improvement is respectable.
13+
14+
By adding more and more data, the performance gains are more apparent.When I tried
15+
running a lot more operations with many many coroutines executing concurrently, the
16+
performance got better, as expected. I expect to see further improvements with real
17+
world latency.
18+
19+
4. cProfile logs -> profile-{file}-{MMDDYYYY_HHMMSS}
20+
21+
- comparing all methods
22+
profile-linear_py-10042020_160228
23+
profile-parallel_py-10042020_160427
24+
25+
- comparing assignment requirements
26+
27+
- initial runs with ~30% improvement
28+
profile-linear_py-10042020_235308
29+
profile-parallel_py-10042020_235310
30+
31+
- more data -> more improvement over sequential operations
32+
profile-linear_py-10042020_235336
33+
profile-parallel_py-10042020_235339

students/tim_lurvey/lesson07/linear.py renamed to students/tim_lurvey/lesson07/assignment/linear.py

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,9 @@
1212
from misc_utils import func_timer
1313

1414
# FILE_LOG_LEVEL = logging.NOTSET # 0
15-
FILE_LOG_LEVEL = logging.DEBUG # 10
15+
# FILE_LOG_LEVEL = logging.DEBUG # 10
1616
# FILE_LOG_LEVEL = logging.INFO # 20
17-
# FILE_LOG_LEVEL = logging.ERROR # 50
17+
FILE_LOG_LEVEL = logging.ERROR # 50
1818

1919
logging.basicConfig(format="%(asctime)s "
2020
"%(levelname)s "
@@ -28,6 +28,8 @@
2828
# database class initialization
2929
mongo = db.MongoDBConnection()
3030

31+
# variables for counting
32+
counts = {}
3133

3234
# @func_timer
3335
def document_to_dict(document: dict, key: str = "_id", suppress: tuple = ()) -> dict:
@@ -133,40 +135,51 @@ def parsed_file_data(filename: str, directory: str = "") -> tuple:
133135
raise error
134136

135137

136-
def import_data(path_name: str, product_file: str, customer_file: str, rentals_file: str)-> tuple:
137-
"""import data in to MongoDB from files"""
138-
138+
def import_data(path_name: str, files: tuple)-> tuple:
139+
"""import data in to MongoDB from files
140+
This function takes a directory name three csv files as input, one with
141+
product data, one with customer data and the third one with rentals data
142+
and creates and populates a new MongoDB database with these data.
143+
It returns 2 tuples: the first with a record count of the number of
144+
products, customers and rentals added (in that order), the second with
145+
a count of any errors that occurred, in the same order."""
139146
logger.info("Begin function import_data()")
140147

141-
input_records = []
142-
success_records = []
143-
144-
for file_name in [product_file, customer_file, rentals_file]:
148+
for file_name in files:
149+
#timer
150+
start_time = time.time()
145151
with mongo:
146152
# connect
147153
database = mongo.connection.norton
148154
# name from file
149155
name = file_name.replace(".csv", "")
156+
counts.update({name:{'old':0, 'new':0, 'errors':0}})
150157
# collection in database
151158
collection = database[name]
152159
logger.debug(f"New collection database.{name} created.")
160+
counts[name]['old'] = collection.count_documents({})
153161
# get data from file modified, modified for database input
154-
start_time = time.time()
155162
data = parsed_file_data(file_name, path_name)
156-
end_time = time.time() - start_time
157163
# inset the data
158164
result = collection.insert_many(data)
159165
# count the records
160-
n_rent = len(data)
161-
n_error = n_rent - len(result.inserted_ids)
162-
# store counts
163-
input_records.append(n_rent)
164-
success_records.append(n_error)
165-
logger.debug(f"Created database.{name} with {n_rent} records and {n_error} errors")
166-
logger.debug(f"Time in database.{name} was {end_time} seconds")
166+
counts[name]['new'] = len(list(collection.find())) - \
167+
counts[name]['old']
168+
counts[name]['errors'] = counts[name]['new'] -\
169+
len(result.inserted_ids)
170+
counts[name]['time'] = time.time() - start_time
171+
# info
172+
logger.debug(f"Time in database.{name} was {counts[name]['time']} seconds")
173+
logger.info(f"Created database.{name} "
174+
f"with {counts[name]['new']} records "
175+
f"and {counts[name]['errors']} errors")
167176

168177
logger.info("End function import_data()")
169-
return (tuple(input_records), tuple(success_records))
178+
answer = [[],[]]
179+
for db in ['products', 'customers', 'rentals']:
180+
answer[0].append(counts[db]['new'])
181+
answer[1].append(counts[db]['errors'])
182+
return tuple(answer[0]), tuple(answer[1])
170183

171184

172185
@func_timer
@@ -198,6 +211,7 @@ def delete_all_collections(exclude: tuple = ()):
198211
def main():
199212
"""main function to populate all data into the database"""
200213
logger.info("begin function main()")
214+
#
201215
pathx = "\\".join(["C:",
202216
"Users",
203217
"pants",
@@ -206,20 +220,29 @@ def main():
206220
"students",
207221
"tim_lurvey",
208222
"lesson07",
223+
"assignment",
209224
"data"])
210225

211-
count, errors = import_data(path_name=pathx,
212-
product_file='products.csv',
213-
customer_file='customers.csv',
214-
rentals_file='rentals.csv')
226+
data_files = ('products.csv', 'customers.csv', 'rentals.csv')
227+
count, errors = import_data(path_name=pathx, files=data_files)
215228

216229
logger.debug(f"Populated all data {count} with {errors} errors")
217230
logger.info("end function main()")
218231

219232
if __name__ == "__main__":
220-
delete_all_collections()
233+
# reset database
234+
# delete_all_collections()
235+
# populate the database
221236
main()
222-
all_products = show_available_products()
223-
for pid in all_products:
224-
rentals = show_rentals(product_id=pid)
225-
logger.info(f"Found {len(rentals)} rental records for {pid}")
237+
#
238+
# all_products = show_available_products()
239+
# for pid in all_products:
240+
# rentals = show_rentals(product_id=pid)
241+
# logger.info(f"Found {len(rentals)} rental records for {pid}")
242+
for db in ('customers', 'rentals'):
243+
print(db, (counts.get(db).get('new'),
244+
counts.get(db).get('old'),
245+
counts.get(db).get('new') + counts.get(db).get('old'),
246+
counts.get(db).get('time'),
247+
)
248+
)

0 commit comments

Comments
 (0)