From 27b8c3a573c1e2e4d77aa597bca5ed79e24bc94b Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Fri, 24 Jun 2022 08:27:29 -0700 Subject: [PATCH 1/7] Update mlops.py --- adk/mlops.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/adk/mlops.py b/adk/mlops.py index a64efbd..b847a18 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -8,6 +8,7 @@ class MLOps(object): spool_dir = "/tmp/ta" agent_dir = "/opt/mlops-agent" mlops_dir_name = "datarobot_mlops_package-8.1.2" + total_dir_path = agent_dir + "/" + mlops_dir_name def __init__(self, api_token, path): self.token = api_token @@ -29,18 +30,18 @@ def init(self): os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = self.spool_dir - with open(f'{self.agent_dir}/{self.mlops_dir_name}/conf/mlops.agent.conf.yaml') as f: + with open(total_dir_path + '/conf/mlops.agent.conf.yaml') as f: documents = yaml.load(f, Loader=yaml.FullLoader) documents['mlopsUrl'] = self.endpoint documents['apiToken'] = self.token - with open(f'{self.agent_dir}/{self.mlops_dir_name}/conf/mlops.agent.conf.yaml', 'w') as f: + with open(total_dir_path + '/conf/mlops.agent.conf.yaml', 'w') as f: yaml.dump(documents, f) - subprocess.call(f'{self.agent_dir}/{self.mlops_dir_name}/bin/start-agent.sh') - check = subprocess.Popen([f'{self.agent_dir}/{self.mlops_dir_name}/bin/status-agent.sh'], stdout=subprocess.PIPE) + subprocess.call(total_dir_path + '/bin/start-agent.sh') + check = subprocess.Popen([total_dir_path + '/bin/status-agent.sh'], stdout=subprocess.PIPE) output = check.stdout.readlines()[0] check.terminate() if b"DataRobot MLOps-Agent is running as a service." in output: return True else: - raise Exception(output) \ No newline at end of file + raise Exception(output) From 201ee6db059983261298cb5301a118023fc6baa2 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Mon, 4 Jul 2022 16:15:40 -0300 Subject: [PATCH 2/7] replaced python with json highlighting --- README.md | 9 +++------ README_template.md | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 21381ec..5c86caf 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ algorithm.init({"data": "https://i.imgur.com/bXdORXl.jpeg"}) Model Manifests are optional files that you can provide to your algorithm to easily define important model files, their locations; and metadata - this file is called `model_manifest.json`. -```python +```json { "required_files" : [ { "name": "squeezenet", @@ -249,7 +249,7 @@ define important model files, their locations; and metadata - this file is calle With the Model Manifest system, you're also able to "freeze" your model_manifest.json, creating a model_manifest.json.freeze. This file encodes the hash of the model file, preventing tampering once frozen - forver locking a version of your algorithm code with your model file. -```python +```json { "required_files":[ { @@ -298,7 +298,7 @@ For this, you'll need to select an MLOps Enabled Environment; and you will need Once setup, you will need to define your `mlops.json` file, including your deployment and model ids. -```python +```json { "model_id": "YOUR_MODEL_ID", "deployment_id": "YOUR_DEPLOYMENT_ID", @@ -345,9 +345,6 @@ algorithm.init(0.25, mlops=True) ``` -report_deployment_stats() - - ## Readme publishing diff --git a/README_template.md b/README_template.md index ded0700..3c47c6a 100644 --- a/README_template.md +++ b/README_template.md @@ -61,12 +61,12 @@ Check out these examples to help you get started: Model Manifests are optional files that you can provide to your algorithm to easily define important model files, their locations; and metadata - this file is called `model_manifest.json`. -```python +```json ``` With the Model Manifest system, you're also able to "freeze" your model_manifest.json, creating a model_manifest.json.freeze. This file encodes the hash of the model file, preventing tampering once frozen - forver locking a version of your algorithm code with your model file. -```python +```json ``` As you can link to both hosted data collections, and AWS/GCP/Azure based block storage media, you're able to link your algorithm code with your model files, wherever they live today. @@ -81,7 +81,7 @@ For this, you'll need to select an MLOps Enabled Environment; and you will need Once setup, you will need to define your `mlops.json` file, including your deployment and model ids. -```python +```json ``` Along with defining your `DATAROBOT_MLOPS_API_TOKEN` as a secret to your Algorithm, you're ready to start sending MLOps data back to DataRobot! @@ -90,9 +90,6 @@ Along with defining your `DATAROBOT_MLOPS_API_TOKEN` as a secret to your Algorit ```python ``` -report_deployment_stats() - - ## Readme publishing From c0f47d1048f761551e49b9c518fca39cef888dbf Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Thu, 14 Jul 2022 10:14:59 -0700 Subject: [PATCH 3/7] added self ref to total_dir_path --- adk/mlops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/adk/mlops.py b/adk/mlops.py index b847a18..cbde73a 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -30,15 +30,15 @@ def init(self): os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = self.spool_dir - with open(total_dir_path + '/conf/mlops.agent.conf.yaml') as f: + with open(self.total_dir_path + '/conf/mlops.agent.conf.yaml') as f: documents = yaml.load(f, Loader=yaml.FullLoader) documents['mlopsUrl'] = self.endpoint documents['apiToken'] = self.token - with open(total_dir_path + '/conf/mlops.agent.conf.yaml', 'w') as f: + with open(self.total_dir_path + '/conf/mlops.agent.conf.yaml', 'w') as f: yaml.dump(documents, f) - subprocess.call(total_dir_path + '/bin/start-agent.sh') - check = subprocess.Popen([total_dir_path + '/bin/status-agent.sh'], stdout=subprocess.PIPE) + subprocess.call(self.total_dir_path + '/bin/start-agent.sh') + check = subprocess.Popen([self.total_dir_path + '/bin/status-agent.sh'], stdout=subprocess.PIPE) output = check.stdout.readlines()[0] check.terminate() if b"DataRobot MLOps-Agent is running as a service." in output: From 350a4eaf05dc5ebfcb806d05c6bbc0b3055982c0 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Fri, 22 Jul 2022 11:29:31 -0300 Subject: [PATCH 4/7] enable safe replacement of mlops.json by setting environment variables --- adk/mlops.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/adk/mlops.py b/adk/mlops.py index cbde73a..34d44e9 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -15,14 +15,18 @@ def __init__(self, api_token, path): if os.path.exists(path): with open(path) as f: mlops_config = json.load(f) - else: - raise Exception("'mlops.json' file does not exist, but mlops was requested.") + self.endpoint = mlops_config['datarobot_mlops_service_url'] + self.model_id = mlops_config['model_id'] + self.deployment_id = mlops_config['deployment_id'] + self.mlops_name = mlops_config.get('mlops_dir_name', 'datarobot_mlops_package-8.1.2') + if "MLOPS_SERVICE_URL" in os.environ: + self.endpoint = os.environ['MLOPS_SERVICE_URL'] + if "MODEL_ID" in os.environ: + self.model_id = os.environ['MODEL_ID'] + if "DEPLOYMENT_ID" in os.environ: + self.deployment_id = os.environ['DEPLOYMENT_ID'] if not os.path.exists(self.agent_dir): raise Exception("environment is not configured for mlops.\nPlease select a valid mlops enabled environment.") - self.endpoint = mlops_config['datarobot_mlops_service_url'] - self.model_id = mlops_config['model_id'] - self.deployment_id = mlops_config['deployment_id'] - self.mlops_name = mlops_config.get('mlops_dir_name', 'datarobot_mlops_package-8.1.2') def init(self): os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id From 7b145070503de84dedf09a6416c8d0c6b6ff16d9 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Tue, 26 Jul 2022 05:47:32 -0300 Subject: [PATCH 5/7] added better error checking --- adk/mlops.py | 10 +++++++++ adk/modeldata.py | 55 ++++++++++++++++++++++++++---------------------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/adk/mlops.py b/adk/mlops.py index 34d44e9..cd335a7 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -28,6 +28,16 @@ def __init__(self, api_token, path): if not os.path.exists(self.agent_dir): raise Exception("environment is not configured for mlops.\nPlease select a valid mlops enabled environment.") + if self.endpoint is None: + raise Exception("'no endpoint found, please add 'MLOPS_SERVICE_URL' environment variable, or create an " + "mlops.json file") + if self.model_id is None: + raise Exception("no model_id found, please add 'MODEL_ID' environment variable, or create an mlops.json " + "file") + if self.deployment_id is None: + raise Exception("no deployment_id found, please add 'DEPLOYMENT_ID' environment variable, or create an " + "mlops.json file") + def init(self): os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id os.environ['MLOPS_MODEL_ID'] = self.model_id diff --git a/adk/modeldata.py b/adk/modeldata.py index e2b49cd..893d28b 100644 --- a/adk/modeldata.py +++ b/adk/modeldata.py @@ -56,37 +56,42 @@ def initialize(self): self.models[name] = FileData(real_hash, local_data_path) def get_model(self, model_name): - if model_name in self.models: - return self.models[model_name].file_path - elif len([optional for optional in self.manifest_data['optional_files'] if - optional['name'] == model_name]) > 0: - self.find_optional_model(model_name) - return self.models[model_name].file_path + if self.available(): + if model_name in self.models: + return self.models[model_name].file_path + elif len([optional for optional in self.manifest_data['optional_files'] if + optional['name'] == model_name]) > 0: + self.find_optional_model(model_name) + return self.models[model_name].file_path + else: + raise Exception("model name " + model_name + " not found in manifest") else: - raise Exception("model name " + model_name + " not found in manifest") + raise Exception("unable to get model {}, model_manifest.json not found.".format(model_name)) def find_optional_model(self, file_name): - - found_models = [optional for optional in self.manifest_data['optional_files'] if - optional['name'] == file_name] - if len(found_models) == 0: - raise Exception("file with name '" + file_name + "' not found in model manifest.") - model_info = found_models[0] - self.models[file_name] = {} - source_uri = model_info['source_uri'] - fail_on_tamper = model_info.get("fail_on_tamper", False) - expected_hash = model_info.get('md5_checksum', None) - with self.client.file(source_uri).getFile() as f: - local_data_path = f.name - real_hash = md5_for_file(local_data_path) - if self.using_frozen: - if real_hash != expected_hash and fail_on_tamper: - raise Exception("Model File Mismatch for " + file_name + - "\nexpected hash: " + expected_hash + "\nreal hash: " + real_hash) + if self.available(): + found_models = [optional for optional in self.manifest_data['optional_files'] if + optional['name'] == file_name] + if len(found_models) == 0: + raise Exception("file with name '" + file_name + "' not found in model manifest.") + model_info = found_models[0] + self.models[file_name] = {} + source_uri = model_info['source_uri'] + fail_on_tamper = model_info.get("fail_on_tamper", False) + expected_hash = model_info.get('md5_checksum', None) + with self.client.file(source_uri).getFile() as f: + local_data_path = f.name + real_hash = md5_for_file(local_data_path) + if self.using_frozen: + if real_hash != expected_hash and fail_on_tamper: + raise Exception("Model File Mismatch for " + file_name + + "\nexpected hash: " + expected_hash + "\nreal hash: " + real_hash) + else: + self.models[file_name] = FileData(real_hash, local_data_path) else: self.models[file_name] = FileData(real_hash, local_data_path) else: - self.models[file_name] = FileData(real_hash, local_data_path) + raise Exception("unable to get model {}, model_manifest.json not found.".format(model_name)) def get_manifest(self): if os.path.exists(self.manifest_frozen_path): From cb18c18cdeddde66ae72d8f5bcd542460fdb3d6a Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Tue, 26 Jul 2022 06:12:01 -0300 Subject: [PATCH 6/7] added better stack trace processing for both load time and runtime failures --- adk/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adk/io.py b/adk/io.py index be2045c..ccaabfc 100644 --- a/adk/io.py +++ b/adk/io.py @@ -57,7 +57,7 @@ def create_exception(exception, loading_exception=False): response = json.dumps({ "error": { "message": str(exception), - "stacktrace": traceback.format_exc(), + "stacktrace": " ".join(traceback.format_exception(etype=type(exception), value=exception, tb=exception.__traceback__)), "error_type": error_type, } }) From 4c6b718d738f4ec6035e53712d96eaf8da3aca08 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Tue, 26 Jul 2022 06:37:59 -0300 Subject: [PATCH 7/7] updated test case as we're now actually using stacktraces! --- tests/test_adk_local.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_adk_local.py b/tests/test_adk_local.py index 4b5585c..d4ed78a 100644 --- a/tests/test_adk_local.py +++ b/tests/test_adk_local.py @@ -135,17 +135,18 @@ def test_manifest_file_success(self): self.assertEqual(expected_output, actual_output) def test_manifest_file_tampered(self): - input = "Algorithmia" + input = 'Algorithmia' expected_output = {"error": {"error_type": "LoadingError", "message": "Model File Mismatch for squeezenet\n" "expected hash: f20b50b44fdef367a225d41f747a0963\n" "real hash: 46a44d32d2c5c07f7f66324bef4c7266", - "stacktrace": "NoneType: None\n"}} + "stacktrace": ''}} actual_output = json.loads(self.execute_manifest_example(input, apply_successful_manifest_parsing, loading_with_manifest, manifest_path="tests/manifests/bad_model_manifest" ".json")) + actual_output['error']['stacktrace'] = '' self.assertEqual(expected_output, actual_output)