From 08935403d2da1d78c9186115562b859903ae6d5c Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Thu, 18 Nov 2021 13:27:49 -0500 Subject: [PATCH 01/20] added support for frozen manifest failover + tests --- adk/ADK.py | 2 +- adk/modeldata.py | 48 ++++++++++++++++++++++++++-------------- tests/test_adk_local.py | 6 ++--- tests/test_adk_remote.py | 2 +- 4 files changed, 37 insertions(+), 21 deletions(-) diff --git a/adk/ADK.py b/adk/ADK.py index 9107bff..51a73f0 100644 --- a/adk/ADK.py +++ b/adk/ADK.py @@ -38,7 +38,7 @@ def __init__(self, apply_func, load_func=None, client=None): self.is_local = not os.path.exists(self.FIFO_PATH) self.load_result = None self.loading_exception = None - self.manifest_path = "model_manifest.json.freeze" + self.manifest_path = "model_manifest.json" self.model_data = self.init_manifest(self.manifest_path) def init_manifest(self, path): diff --git a/adk/modeldata.py b/adk/modeldata.py index 39dadf9..529d8ff 100644 --- a/adk/modeldata.py +++ b/adk/modeldata.py @@ -6,8 +6,9 @@ class ModelData(object): def __init__(self, client, model_manifest_path): - self.manifest_freeze_path = model_manifest_path - self.manifest_data = get_manifest(self.manifest_freeze_path) + self.manifest_path = model_manifest_path + self.manifest_freeze_path = "{}.freeze".format(self.manifest_path) + self.manifest_data = get_manifest(self.manifest_freeze_path, self.manifest_path) self.client = client self.models = {} self.usr_key = "__user__" @@ -27,7 +28,6 @@ def data(self): output[without_usr_key] = __dict[key] return output - def available(self): if self.manifest_data: return True @@ -39,14 +39,16 @@ def initialize(self): raise Exception("Client was not defined, please define a Client when using Model Manifests.") for required_file in self.manifest_data['required_files']: name = required_file['name'] + source_uri = required_file['source_uri'] + fail_on_tamper = required_file.get('fail_on_tamper', False) + expected_hash = required_file.get('md5_checksum', None) if name in self.models: raise Exception("Duplicate 'name' detected. \n" + name + " was found to be used by more than one data file, please rename.") - expected_hash = required_file['md5_checksum'] - with self.client.file(required_file['source_uri']).getFile() as f: + with self.client.file(source_uri).getFile() as f: local_data_path = f.name real_hash = md5_for_file(local_data_path) - if real_hash != expected_hash and required_file['fail_on_tamper']: + if real_hash != expected_hash and fail_on_tamper: raise Exception("Model File Mismatch for " + name + "\nexpected hash: " + expected_hash + "\nreal hash: " + real_hash) else: @@ -70,32 +72,46 @@ def find_optional_model(self, file_name): raise Exception("file with name '" + file_name + "' not found in model manifest.") model_info = found_models[0] self.models[file_name] = {} - expected_hash = model_info['md5_checksum'] - with self.client.file(model_info['source_uri']).getFile() as f: + source_uri = model_info['source_uri'] + fail_on_tamper = model_info.get("fail_on_tamper", False) + expected_hash = model_info.get('md5_checksum', None) + with self.client.file(source_uri).getFile() as f: local_data_path = f.name real_hash = md5_for_file(local_data_path) - if real_hash != expected_hash and model_info['fail_on_tamper']: + if real_hash != expected_hash and fail_on_tamper: raise Exception("Model File Mismatch for " + file_name + "\nexpected hash: " + expected_hash + "\nreal hash: " + real_hash) else: self.models[file_name] = FileData(real_hash, local_data_path) -def get_manifest(manifest_path): - if os.path.exists(manifest_path): - with open(manifest_path) as f: +def get_manifest(manifest_frozen_path, manifest_reg_path): + if os.path.exists(manifest_frozen_path): + with open(manifest_frozen_path) as f: manifest_data = json.load(f) - expected_lock_checksum = manifest_data.get('lock_checksum') - del manifest_data['lock_checksum'] - detected_lock_checksum = md5_for_str(str(manifest_data)) - if expected_lock_checksum != detected_lock_checksum: + if check_lock(manifest_data): + return manifest_data + else: raise Exception("Manifest FreezeFile Tamper Detected; please use the CLI and 'algo freeze' to rebuild your " "algorithm's freeze file.") + elif os.path.exists(manifest_reg_path): + with open(manifest_reg_path) as f: + manifest_data = json.load(f) return manifest_data else: return None +def check_lock(manifest_data): + expected_lock_checksum = manifest_data.get('lock_checksum') + del manifest_data['lock_checksum'] + detected_lock_checksum = md5_for_str(str(manifest_data)) + if expected_lock_checksum != detected_lock_checksum: + return False + else: + return True + + def md5_for_file(fname): hash_md5 = hashlib.md5() with open(fname, "rb") as f: diff --git a/tests/test_adk_local.py b/tests/test_adk_local.py index 5592bfc..4b5585c 100644 --- a/tests/test_adk_local.py +++ b/tests/test_adk_local.py @@ -23,7 +23,7 @@ def execute_example(self, input, apply, load=None): algo.init(input, pprint=lambda x: output.append(x)) return output[0] - def execute_manifest_example(self, input, apply, load, manifest_path="manifests/good_model_manifest.json.freeze"): + def execute_manifest_example(self, input, apply, load, manifest_path): client = Algorithmia.client() algo = ADKTest(apply, load, manifest_path=manifest_path, client=client) output = [] @@ -131,7 +131,7 @@ def test_manifest_file_success(self): actual_output = json.loads(self.execute_manifest_example(input, apply_successful_manifest_parsing, loading_with_manifest, manifest_path="tests/manifests/good_model_manifest" - ".json.freeze")) + ".json")) self.assertEqual(expected_output, actual_output) def test_manifest_file_tampered(self): @@ -145,7 +145,7 @@ def test_manifest_file_tampered(self): actual_output = json.loads(self.execute_manifest_example(input, apply_successful_manifest_parsing, loading_with_manifest, manifest_path="tests/manifests/bad_model_manifest" - ".json.freeze")) + ".json")) self.assertEqual(expected_output, actual_output) diff --git a/tests/test_adk_remote.py b/tests/test_adk_remote.py index f0d69ba..5108dd7 100644 --- a/tests/test_adk_remote.py +++ b/tests/test_adk_remote.py @@ -173,7 +173,7 @@ def test_manifest_file_success(self): actual_output = self.execute_manifest_example(input, apply_successful_manifest_parsing, loading_with_manifest, manifest_path="tests/manifests/good_model_manifest" - ".json.freeze") + ".json") self.assertEqual(expected_output, actual_output) From 59c9a7b53eda10d367e6352b9801b42a41a4ce54 Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Wed, 1 Dec 2021 11:15:50 -0400 Subject: [PATCH 02/20] added default to load_func check in the event that modeldata is not used. --- adk/ADK.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/adk/ADK.py b/adk/ADK.py index 51a73f0..913ca19 100644 --- a/adk/ADK.py +++ b/adk/ADK.py @@ -29,8 +29,8 @@ def __init__(self, apply_func, load_func=None, client=None): if load_func: load_args, _, _, _, _, _, _ = inspect.getfullargspec(load_func) self.load_arity = len(load_args) - if self.load_arity != 1: - raise Exception("load function expects 1 parameter to be used to store algorithm state") + if self.load_arity not in (0, 1): + raise Exception("load function expects 0 parameters or 1 parameter to be used to store algorithm state") self.load_func = load_func else: self.load_func = None @@ -48,8 +48,10 @@ def load(self): try: if self.model_data.available(): self.model_data.initialize() - if self.load_func: + if self.load_func and self.load_arity == 1: self.load_result = self.load_func(self.model_data) + elif self.load_func: + self.load_result = self.load_func() except Exception as e: self.loading_exception = e finally: From 0820e4412c775195480795bfd2485b6f50330da1 Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Mon, 28 Feb 2022 14:56:08 -0400 Subject: [PATCH 03/20] ignore tamper settings if we're using a model manifest fallback --- adk/modeldata.py | 43 ++++++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/adk/modeldata.py b/adk/modeldata.py index 529d8ff..0b6acab 100644 --- a/adk/modeldata.py +++ b/adk/modeldata.py @@ -6,12 +6,13 @@ class ModelData(object): def __init__(self, client, model_manifest_path): - self.manifest_path = model_manifest_path - self.manifest_freeze_path = "{}.freeze".format(self.manifest_path) - self.manifest_data = get_manifest(self.manifest_freeze_path, self.manifest_path) + self.manifest_reg_path = model_manifest_path + self.manifest_frozen_path = "{}.freeze".format(self.manifest_reg_path) + self.manifest_data = self.get_manifest() self.client = client self.models = {} self.usr_key = "__user__" + self.using_frozen = True def __getitem__(self, key): return getattr(self, self.usr_key + key) @@ -78,28 +79,32 @@ def find_optional_model(self, file_name): with self.client.file(source_uri).getFile() as f: local_data_path = f.name real_hash = md5_for_file(local_data_path) - if real_hash != expected_hash and fail_on_tamper: - raise Exception("Model File Mismatch for " + file_name + - "\nexpected hash: " + expected_hash + "\nreal hash: " + real_hash) + if self.using_frozen: + if real_hash != expected_hash and fail_on_tamper: + raise Exception("Model File Mismatch for " + file_name + + "\nexpected hash: " + expected_hash + "\nreal hash: " + real_hash) + else: + self.models[file_name] = FileData(real_hash, local_data_path) else: self.models[file_name] = FileData(real_hash, local_data_path) -def get_manifest(manifest_frozen_path, manifest_reg_path): - if os.path.exists(manifest_frozen_path): - with open(manifest_frozen_path) as f: - manifest_data = json.load(f) - if check_lock(manifest_data): + def get_manifest(self): + if os.path.exists(self.manifest_frozen_path): + with open(self.manifest_frozen_path) as f: + manifest_data = json.load(f) + if check_lock(manifest_data): + return manifest_data + else: + raise Exception("Manifest FreezeFile Tamper Detected; please use the CLI and 'algo freeze' to rebuild your " + "algorithm's freeze file.") + elif os.path.exists(self.manifest_reg_path): + with open(self.manifest_reg_path) as f: + manifest_data = json.load(f) + self.using_frozen = False return manifest_data else: - raise Exception("Manifest FreezeFile Tamper Detected; please use the CLI and 'algo freeze' to rebuild your " - "algorithm's freeze file.") - elif os.path.exists(manifest_reg_path): - with open(manifest_reg_path) as f: - manifest_data = json.load(f) - return manifest_data - else: - return None + return None def check_lock(manifest_data): From 35a17a3006962e80746d93244f434bde441a83fc Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Tue, 1 Mar 2022 12:36:22 -0400 Subject: [PATCH 04/20] ensured that init checks for any kind of payload; including "" or {} as input --- adk/ADK.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adk/ADK.py b/adk/ADK.py index 913ca19..10c0222 100644 --- a/adk/ADK.py +++ b/adk/ADK.py @@ -94,7 +94,7 @@ def process_local(self, local_payload, pprint): def init(self, local_payload=None, pprint=print): self.load() - if self.is_local and local_payload: + if self.is_local and local_payload is not None: if self.loading_exception: load_error = create_exception(self.loading_exception, loading_exception=True) self.write_to_pipe(load_error, pprint=pprint) From 09422cad464298f150068c3c7b00bfc6e859ccbb Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Sat, 11 Jun 2022 21:30:20 -0300 Subject: [PATCH 05/20] initial testable commit --- adk/ADK.py | 23 ++++++++++++++- adk/mlops.py | 36 +++++++++++++++++++++++ adk/modeldata.py | 24 ++++++++++++--- tests/manifests/mlops_model_manifest.json | 9 ++++++ 4 files changed, 87 insertions(+), 5 deletions(-) create mode 100644 adk/mlops.py create mode 100644 tests/manifests/mlops_model_manifest.json diff --git a/adk/ADK.py b/adk/ADK.py index 10c0222..5994767 100644 --- a/adk/ADK.py +++ b/adk/ADK.py @@ -3,6 +3,10 @@ import os import sys import Algorithmia +import yaml +import os +import subprocess + from adk.io import create_exception, format_data, format_response from adk.modeldata import ModelData @@ -92,7 +96,24 @@ def process_local(self, local_payload, pprint): result = self.apply(local_payload) self.write_to_pipe(result, pprint=pprint) - def init(self, local_payload=None, pprint=print): + def mlops_initialize(self): + os.environ["MLOPS_SPOOLER_TYPE"] = "FILESYSTEM" + os.environ["MLOPS_FILESYSTEM_DIRECTORY"] = self.mlops_spool_dir + with open(f'{agents_dir}/conf/mlops.agent.conf.yaml') as f: + documents = yaml.load(f, Loader=yaml.FullLoader) + documents['mlopsUrl'] = DATAROBOT_ENDPOINT + documents['apiToken'] = DATAROBOT_API_TOKEN + with open(f'{agents_dir}/conf/mlops.agent.conf.yaml', 'w') as f: + yaml.dump(documents, f) + subprocess.call(f'{agents_dir}/bin/start-agent.sh') + check = subprocess.Popen([f'{agents_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) + check.terminate() + + + + def init(self, local_payload=None, pprint=print, mlops=False): + if mlops and not self.is_local: + self.mlops_initialize() self.load() if self.is_local and local_payload is not None: if self.loading_exception: diff --git a/adk/mlops.py b/adk/mlops.py new file mode 100644 index 0000000..6ac1dc9 --- /dev/null +++ b/adk/mlops.py @@ -0,0 +1,36 @@ +import yaml +import os +import subprocess + + +class MLOps(Object): + def __init__(self, endpoint, api_token, model_id, deployment_id): + self.token = api_token + self.endpoint = endpoint + self.model_id = model_id + self.deployment_id = deployment_id + self.spool_dir = "/tmp/ta" + self.agent_dir = "/opt/mlops-agent/datarobot_mlops_package-8.1.2" + + def init(self): + with open(f'{self.agent_dir}/conf/mlops.agent.conf.yaml') as f: + documents = yaml.load(f, Loader=yaml.FullLoader) + documents['mlopsUrl'] = self.endpoint + documents['apiToken'] = self.token + with open(f'{agents_dir}/conf/mlops.agent.conf.yaml', 'w') as f: + yaml.dump(documents, f) + + subprocess.call(f'{agents_dir}/bin/start-agent.sh') + check = subprocess.Popen([f'{agents_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) + output = check.stdout.readlines() + check.terminate() + if "DataRobot MLOps-Agent is running as a service." in output: + return True + else: + return False + + def env_vars(self): + os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id + os.environ['MLOPS_MODEL_ID'] = self.model_id + os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" + os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = "/tmp/ta" \ No newline at end of file diff --git a/adk/modeldata.py b/adk/modeldata.py index 0b6acab..6c832e2 100644 --- a/adk/modeldata.py +++ b/adk/modeldata.py @@ -2,10 +2,11 @@ import json import hashlib from adk.classes import FileData +from adk.mlops import MLOps class ModelData(object): - def __init__(self, client, model_manifest_path): + def __init__(self, client, model_manifest_path, mlops=False): self.manifest_reg_path = model_manifest_path self.manifest_frozen_path = "{}.freeze".format(self.manifest_reg_path) self.manifest_data = self.get_manifest() @@ -13,6 +14,7 @@ def __init__(self, client, model_manifest_path): self.models = {} self.usr_key = "__user__" self.using_frozen = True + self.use_mlops = mlops def __getitem__(self, key): return getattr(self, self.usr_key + key) @@ -38,6 +40,8 @@ def available(self): def initialize(self): if self.client is None: raise Exception("Client was not defined, please define a Client when using Model Manifests.") + if self.use_mlops: + self.mlops_init() for required_file in self.manifest_data['required_files']: name = required_file['name'] source_uri = required_file['source_uri'] @@ -88,7 +92,6 @@ def find_optional_model(self, file_name): else: self.models[file_name] = FileData(real_hash, local_data_path) - def get_manifest(self): if os.path.exists(self.manifest_frozen_path): with open(self.manifest_frozen_path) as f: @@ -96,8 +99,9 @@ def get_manifest(self): if check_lock(manifest_data): return manifest_data else: - raise Exception("Manifest FreezeFile Tamper Detected; please use the CLI and 'algo freeze' to rebuild your " - "algorithm's freeze file.") + raise Exception( + "Manifest FreezeFile Tamper Detected; please use the CLI and 'algo freeze' to rebuild your " + "algorithm's freeze file.") elif os.path.exists(self.manifest_reg_path): with open(self.manifest_reg_path) as f: manifest_data = json.load(f) @@ -106,6 +110,18 @@ def get_manifest(self): else: return None + def mlops_init(self): + mlops = self.manifest_data['mlops'] + model_id = mlops['model_id'] + deployment_id = mlops['deployment_id'] + datarobot_api_endpoint = mlops['datarobot_api_endpoint'] + + api_token = os.environ.get('DATAROBOT_MLOPS_API_TOKEN') + if api_token is None: + raise Exception("'DATAROBOT_MLOPS_API_TOKEN' environment variable not found.\nPlease ensure that you have a" + "valid API token and add it as a secret to this algorithm.") + self.mlops = MLOps(datarobot_api_endpoint, api_token, model_id, deployment_id) + def check_lock(manifest_data): expected_lock_checksum = manifest_data.get('lock_checksum') diff --git a/tests/manifests/mlops_model_manifest.json b/tests/manifests/mlops_model_manifest.json new file mode 100644 index 0000000..0d85f61 --- /dev/null +++ b/tests/manifests/mlops_model_manifest.json @@ -0,0 +1,9 @@ +{ + "mlops": { + "model_id": "", + "deployment_id": "", + "datarobot_api_endpoint": "https://app.datarobot.com/" + }, + "required_models": [], + "optional_models": [] +} \ No newline at end of file From 435e4fe5f6e2c3d6c57f158168d59de256f3e932 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Sun, 12 Jun 2022 01:24:11 -0300 Subject: [PATCH 06/20] functional, feature filled commit --- adk/ADK.py | 33 +++++++--------- adk/mlops.py | 46 +++++++++++++---------- adk/modeldata.py | 18 +-------- tests/manifests/mlops_model_manifest.json | 9 ----- 4 files changed, 41 insertions(+), 65 deletions(-) delete mode 100644 tests/manifests/mlops_model_manifest.json diff --git a/adk/ADK.py b/adk/ADK.py index 5994767..a3edd54 100644 --- a/adk/ADK.py +++ b/adk/ADK.py @@ -9,6 +9,7 @@ from adk.io import create_exception, format_data, format_response from adk.modeldata import ModelData +from adk.mlops import MLOps class ADK(object): @@ -21,6 +22,7 @@ def __init__(self, apply_func, load_func=None, client=None): :param client: A Algorithmia Client instance that might be user defined, and is used for interacting with a model manifest file; if defined. """ + self.mlops = None self.FIFO_PATH = "/tmp/algoout" if client: @@ -43,10 +45,8 @@ def __init__(self, apply_func, load_func=None, client=None): self.load_result = None self.loading_exception = None self.manifest_path = "model_manifest.json" - self.model_data = self.init_manifest(self.manifest_path) - - def init_manifest(self, path): - return ModelData(self.client, path) + self.mlops_path = "mlops.json" + self.model_data = ModelData(self.client, self.manifest_path) def load(self): try: @@ -95,25 +95,18 @@ def write_to_pipe(self, payload, pprint=print): def process_local(self, local_payload, pprint): result = self.apply(local_payload) self.write_to_pipe(result, pprint=pprint) - - def mlops_initialize(self): - os.environ["MLOPS_SPOOLER_TYPE"] = "FILESYSTEM" - os.environ["MLOPS_FILESYSTEM_DIRECTORY"] = self.mlops_spool_dir - with open(f'{agents_dir}/conf/mlops.agent.conf.yaml') as f: - documents = yaml.load(f, Loader=yaml.FullLoader) - documents['mlopsUrl'] = DATAROBOT_ENDPOINT - documents['apiToken'] = DATAROBOT_API_TOKEN - with open(f'{agents_dir}/conf/mlops.agent.conf.yaml', 'w') as f: - yaml.dump(documents, f) - subprocess.call(f'{agents_dir}/bin/start-agent.sh') - check = subprocess.Popen([f'{agents_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) - check.terminate() - - + + def mlops_init(self): + mlops_token = os.environ.get("DATAROBOT_MLOPS_API_TOKEN", None) + if mlops_token: + self.mlops = MLOps(mlops_token, self.mlops_path) + self.mlops.init() + else: + raise Exception("'DATAROBOT_MLOPS_API_TOKEN' was not found, please set to use mlops.") def init(self, local_payload=None, pprint=print, mlops=False): if mlops and not self.is_local: - self.mlops_initialize() + self.mlops_init() self.load() if self.is_local and local_payload is not None: if self.loading_exception: diff --git a/adk/mlops.py b/adk/mlops.py index 6ac1dc9..8b4fe19 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -1,36 +1,44 @@ import yaml +import json import os import subprocess -class MLOps(Object): - def __init__(self, endpoint, api_token, model_id, deployment_id): +class MLOps(object): + spool_dir = "/tmp/ta" + agent_dir = "/opt/mlops-agent/datarobot_mlops_package-8.1.2" + + def __init__(self, api_token, path): self.token = api_token - self.endpoint = endpoint - self.model_id = model_id - self.deployment_id = deployment_id - self.spool_dir = "/tmp/ta" - self.agent_dir = "/opt/mlops-agent/datarobot_mlops_package-8.1.2" + if os.path.exists(path): + with open(path) as f: + mlops_config = json.load(f) + else: + raise Exception("'mlops.json' file does not exist, but mlops was requested.") + if not os.path.exists(agent_dir): + raise Exception("environment is not configured for mlops.\nPlease select a valid mlops enabled environment.") + self.endpoint = mlops_config['datarobot_api_endpoint'] + self.model_id = mlops_config['model_id'] + self.deployment_id = mlops_config['deployment_id'] def init(self): + os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id + os.environ['MLOPS_MODEL_ID'] = self.model_id + os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" + os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = "/tmp/ta" + with open(f'{self.agent_dir}/conf/mlops.agent.conf.yaml') as f: documents = yaml.load(f, Loader=yaml.FullLoader) documents['mlopsUrl'] = self.endpoint documents['apiToken'] = self.token - with open(f'{agents_dir}/conf/mlops.agent.conf.yaml', 'w') as f: + with open(f'{self.agent_dir}/conf/mlops.agent.conf.yaml', 'w') as f: yaml.dump(documents, f) - subprocess.call(f'{agents_dir}/bin/start-agent.sh') - check = subprocess.Popen([f'{agents_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) - output = check.stdout.readlines() + subprocess.call(f'{self.agent_dir}/bin/start-agent.sh') + check = subprocess.Popen([f'{self.agent_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) + output = check.stdout.readlines()[0] check.terminate() - if "DataRobot MLOps-Agent is running as a service." in output: + if b"DataRobot MLOps-Agent is running as a service." in output: return True else: - return False - - def env_vars(self): - os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id - os.environ['MLOPS_MODEL_ID'] = self.model_id - os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" - os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = "/tmp/ta" \ No newline at end of file + raise Exception(output) \ No newline at end of file diff --git a/adk/modeldata.py b/adk/modeldata.py index 6c832e2..e2b49cd 100644 --- a/adk/modeldata.py +++ b/adk/modeldata.py @@ -2,11 +2,10 @@ import json import hashlib from adk.classes import FileData -from adk.mlops import MLOps class ModelData(object): - def __init__(self, client, model_manifest_path, mlops=False): + def __init__(self, client, model_manifest_path): self.manifest_reg_path = model_manifest_path self.manifest_frozen_path = "{}.freeze".format(self.manifest_reg_path) self.manifest_data = self.get_manifest() @@ -14,7 +13,6 @@ def __init__(self, client, model_manifest_path, mlops=False): self.models = {} self.usr_key = "__user__" self.using_frozen = True - self.use_mlops = mlops def __getitem__(self, key): return getattr(self, self.usr_key + key) @@ -40,8 +38,6 @@ def available(self): def initialize(self): if self.client is None: raise Exception("Client was not defined, please define a Client when using Model Manifests.") - if self.use_mlops: - self.mlops_init() for required_file in self.manifest_data['required_files']: name = required_file['name'] source_uri = required_file['source_uri'] @@ -110,18 +106,6 @@ def get_manifest(self): else: return None - def mlops_init(self): - mlops = self.manifest_data['mlops'] - model_id = mlops['model_id'] - deployment_id = mlops['deployment_id'] - datarobot_api_endpoint = mlops['datarobot_api_endpoint'] - - api_token = os.environ.get('DATAROBOT_MLOPS_API_TOKEN') - if api_token is None: - raise Exception("'DATAROBOT_MLOPS_API_TOKEN' environment variable not found.\nPlease ensure that you have a" - "valid API token and add it as a secret to this algorithm.") - self.mlops = MLOps(datarobot_api_endpoint, api_token, model_id, deployment_id) - def check_lock(manifest_data): expected_lock_checksum = manifest_data.get('lock_checksum') diff --git a/tests/manifests/mlops_model_manifest.json b/tests/manifests/mlops_model_manifest.json deleted file mode 100644 index 0d85f61..0000000 --- a/tests/manifests/mlops_model_manifest.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "mlops": { - "model_id": "", - "deployment_id": "", - "datarobot_api_endpoint": "https://app.datarobot.com/" - }, - "required_models": [], - "optional_models": [] -} \ No newline at end of file From e0ec40e949073ced1e436a458e5b6fa25b5322c0 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Sun, 12 Jun 2022 01:59:10 -0300 Subject: [PATCH 07/20] added yaml as dependency --- adk/ADK.py | 1 - requirements.txt | 3 ++- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/adk/ADK.py b/adk/ADK.py index a3edd54..718ef98 100644 --- a/adk/ADK.py +++ b/adk/ADK.py @@ -3,7 +3,6 @@ import os import sys import Algorithmia -import yaml import os import subprocess diff --git a/requirements.txt b/requirements.txt index ccb528b..8ba7957 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ algorithmia>=1.7,<2 -six \ No newline at end of file +six +pyaml==21.10 \ No newline at end of file From b32a1be13f0938aa34fcc249d48166802b65e412 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Sun, 12 Jun 2022 02:01:52 -0300 Subject: [PATCH 08/20] ugh version mgmt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 8ba7957..eda150c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ algorithmia>=1.7,<2 six -pyaml==21.10 \ No newline at end of file +pyaml>=21.10,<21.11 \ No newline at end of file From 485ef9f65f5959b732608b3587b6cf603bf71a20 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Sun, 12 Jun 2022 02:03:25 -0300 Subject: [PATCH 09/20] fix test shim --- tests/AdkTest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/AdkTest.py b/tests/AdkTest.py index e6b4672..941d848 100644 --- a/tests/AdkTest.py +++ b/tests/AdkTest.py @@ -1,7 +1,7 @@ from adk import ADK - +from adk.modeldata import ModelData class ADKTest(ADK): def __init__(self, apply_func, load_func=None, client=None, manifest_path="model_manifest.json.freeze"): super(ADKTest, self).__init__(apply_func, load_func, client) - self.model_data = self.init_manifest(manifest_path) + self.model_data = ModelData(self.client, manifest_path) From c1d82009375317c25eb60e5472e1724119917d5f Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Wed, 15 Jun 2022 14:16:34 -0300 Subject: [PATCH 10/20] replaced wildcard with actual path, which can be overridedn by the mlops.json file --- adk/mlops.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/adk/mlops.py b/adk/mlops.py index 8b4fe19..a64efbd 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -6,7 +6,8 @@ class MLOps(object): spool_dir = "/tmp/ta" - agent_dir = "/opt/mlops-agent/datarobot_mlops_package-8.1.2" + agent_dir = "/opt/mlops-agent" + mlops_dir_name = "datarobot_mlops_package-8.1.2" def __init__(self, api_token, path): self.token = api_token @@ -15,27 +16,28 @@ def __init__(self, api_token, path): mlops_config = json.load(f) else: raise Exception("'mlops.json' file does not exist, but mlops was requested.") - if not os.path.exists(agent_dir): + if not os.path.exists(self.agent_dir): raise Exception("environment is not configured for mlops.\nPlease select a valid mlops enabled environment.") - self.endpoint = mlops_config['datarobot_api_endpoint'] + self.endpoint = mlops_config['datarobot_mlops_service_url'] self.model_id = mlops_config['model_id'] self.deployment_id = mlops_config['deployment_id'] + self.mlops_name = mlops_config.get('mlops_dir_name', 'datarobot_mlops_package-8.1.2') def init(self): os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id os.environ['MLOPS_MODEL_ID'] = self.model_id os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" - os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = "/tmp/ta" + os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = self.spool_dir - with open(f'{self.agent_dir}/conf/mlops.agent.conf.yaml') as f: + with open(f'{self.agent_dir}/{self.mlops_dir_name}/conf/mlops.agent.conf.yaml') as f: documents = yaml.load(f, Loader=yaml.FullLoader) documents['mlopsUrl'] = self.endpoint documents['apiToken'] = self.token - with open(f'{self.agent_dir}/conf/mlops.agent.conf.yaml', 'w') as f: + with open(f'{self.agent_dir}/{self.mlops_dir_name}/conf/mlops.agent.conf.yaml', 'w') as f: yaml.dump(documents, f) - subprocess.call(f'{self.agent_dir}/bin/start-agent.sh') - check = subprocess.Popen([f'{self.agent_dir}/bin/status-agent.sh'], stdout=subprocess.PIPE) + subprocess.call(f'{self.agent_dir}/{self.mlops_dir_name}/bin/start-agent.sh') + check = subprocess.Popen([f'{self.agent_dir}/{self.mlops_dir_name}/bin/status-agent.sh'], stdout=subprocess.PIPE) output = check.stdout.readlines()[0] check.terminate() if b"DataRobot MLOps-Agent is running as a service." in output: From a44964b611c85ed161e8c4482edf07d29a4e667c Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Wed, 15 Jun 2022 17:43:49 -0300 Subject: [PATCH 11/20] added pyaml to the dependencies --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index f470b4a..8d34878 100644 --- a/setup.py +++ b/setup.py @@ -14,6 +14,7 @@ author_email='support@algorithmia.com', packages=['adk'], install_requires=[ + 'pyaml>=21.10,<21.11', 'six', ], include_package_data=True, From 27b8c3a573c1e2e4d77aa597bca5ed79e24bc94b Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Fri, 24 Jun 2022 08:27:29 -0700 Subject: [PATCH 12/20] Update mlops.py --- adk/mlops.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/adk/mlops.py b/adk/mlops.py index a64efbd..b847a18 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -8,6 +8,7 @@ class MLOps(object): spool_dir = "/tmp/ta" agent_dir = "/opt/mlops-agent" mlops_dir_name = "datarobot_mlops_package-8.1.2" + total_dir_path = agent_dir + "/" + mlops_dir_name def __init__(self, api_token, path): self.token = api_token @@ -29,18 +30,18 @@ def init(self): os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = self.spool_dir - with open(f'{self.agent_dir}/{self.mlops_dir_name}/conf/mlops.agent.conf.yaml') as f: + with open(total_dir_path + '/conf/mlops.agent.conf.yaml') as f: documents = yaml.load(f, Loader=yaml.FullLoader) documents['mlopsUrl'] = self.endpoint documents['apiToken'] = self.token - with open(f'{self.agent_dir}/{self.mlops_dir_name}/conf/mlops.agent.conf.yaml', 'w') as f: + with open(total_dir_path + '/conf/mlops.agent.conf.yaml', 'w') as f: yaml.dump(documents, f) - subprocess.call(f'{self.agent_dir}/{self.mlops_dir_name}/bin/start-agent.sh') - check = subprocess.Popen([f'{self.agent_dir}/{self.mlops_dir_name}/bin/status-agent.sh'], stdout=subprocess.PIPE) + subprocess.call(total_dir_path + '/bin/start-agent.sh') + check = subprocess.Popen([total_dir_path + '/bin/status-agent.sh'], stdout=subprocess.PIPE) output = check.stdout.readlines()[0] check.terminate() if b"DataRobot MLOps-Agent is running as a service." in output: return True else: - raise Exception(output) \ No newline at end of file + raise Exception(output) From 295c61a5bdac918e57d886ba130167e18a4f8feb Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Mon, 4 Jul 2022 16:08:38 -0300 Subject: [PATCH 13/20] updated readme template to have model manifest and mlops, added mlops example --- README_template.md | 40 +++++++++++++++++++++ examples/mlops_hello_world/mlops.json | 5 +++ examples/mlops_hello_world/requirements.txt | 4 +++ examples/mlops_hello_world/src/Algorithm.py | 32 +++++++++++++++++ examples/mlops_hello_world/src/__init__.py | 0 5 files changed, 81 insertions(+) create mode 100644 examples/mlops_hello_world/mlops.json create mode 100644 examples/mlops_hello_world/requirements.txt create mode 100644 examples/mlops_hello_world/src/Algorithm.py create mode 100644 examples/mlops_hello_world/src/__init__.py diff --git a/README_template.md b/README_template.md index a2e756d..ded0700 100644 --- a/README_template.md +++ b/README_template.md @@ -8,6 +8,8 @@ This document will describe the following: - What is an Algorithm Development Kit - Changes to Algorithm development - Example workflows you can use to create your own Algorithms. +- The Model Manifest System +- Datarobot MLOps integrations support ## What is an Algorithm Development Kit @@ -55,6 +57,44 @@ Check out these examples to help you get started: ```python ``` +## The Model Manifest System +Model Manifests are optional files that you can provide to your algorithm to easily +define important model files, their locations; and metadata - this file is called `model_manifest.json`. + +```python +``` +With the Model Manifest system, you're also able to "freeze" your model_manifest.json, creating a model_manifest.json.freeze. +This file encodes the hash of the model file, preventing tampering once frozen - forver locking a version of your algorithm code with your model file. + +```python +``` + +As you can link to both hosted data collections, and AWS/GCP/Azure based block storage media, you're able to link your algorithm code with your model files, wherever they live today. + + +## Datarobot MLOps Integration +As part of the integration with Datarobot, we've built out integration support for the [DataRobot MLOps Agent](https://docs.datarobot.com/en/docs/mlops/deployment/mlops-agent/index.html) +By selecting `mlops=True` as part of the ADK `init()` function, the ADK will configure and setup the MLOps Agent to support writing content directly back to DataRobot. + + +For this, you'll need to select an MLOps Enabled Environment; and you will need to setup a DataRobot External Deployment. +Once setup, you will need to define your `mlops.json` file, including your deployment and model ids. + + +```python +``` + +Along with defining your `DATAROBOT_MLOPS_API_TOKEN` as a secret to your Algorithm, you're ready to start sending MLOps data back to DataRobot! + + +```python +``` + +report_deployment_stats() + + + + ## Readme publishing To compile the template readme, please check out [embedme](https://github.com/zakhenry/embedme) utility and run the following: diff --git a/examples/mlops_hello_world/mlops.json b/examples/mlops_hello_world/mlops.json new file mode 100644 index 0000000..c2fbfae --- /dev/null +++ b/examples/mlops_hello_world/mlops.json @@ -0,0 +1,5 @@ +{ + "model_id": "YOUR_MODEL_ID", + "deployment_id": "YOUR_DEPLOYMENT_ID", + "datarobot_mlops_service_url": "https://app.datarobot.com/" +} \ No newline at end of file diff --git a/examples/mlops_hello_world/requirements.txt b/examples/mlops_hello_world/requirements.txt new file mode 100644 index 0000000..47f13bc --- /dev/null +++ b/examples/mlops_hello_world/requirements.txt @@ -0,0 +1,4 @@ +algorithmia>=1.0.0,<2.0 +datarobot-mlops==8.0.7 +pyaml==21.10.1 +pillow<9.0 \ No newline at end of file diff --git a/examples/mlops_hello_world/src/Algorithm.py b/examples/mlops_hello_world/src/Algorithm.py new file mode 100644 index 0000000..1eeb104 --- /dev/null +++ b/examples/mlops_hello_world/src/Algorithm.py @@ -0,0 +1,32 @@ +from Algorithmia import ADK +from time import time + +# API calls will begin at the apply() method, with the request body passed as 'input' +# For more details, see algorithmia.com/developers/algorithm-development/languages + +def load(state): + # Lets initialize the final components of the MLOps plugin and prepare it for sending info back to DataRobot. + state['mlops'] = MLOps().init() + return state + +def apply(input, state): + t1 = time() + df = pd.DataFrame(columns=['id', 'values']) + df.loc[0] = ["abcd", 0.25] + df.loc[0][1] += input + association_ids = df.iloc[:, 0].tolist() + reporting_predictions = df.loc[0][1] + t2 = time() + # As we're only making 1 prediction, our reporting tool should show only 1 prediction being made + state['mlops'].report_deployment_stats(1, t2 - t1) + + # Report the predictions data: features, predictions, class_names + state['mlops'].report_predictions_data(features_df=df, + predictions=reporting_predictions, + association_ids=association_ids) + return reporting_predictions + + +algorithm = ADK(apply, load) +algorithm.init(0.25, mlops=True) + diff --git a/examples/mlops_hello_world/src/__init__.py b/examples/mlops_hello_world/src/__init__.py new file mode 100644 index 0000000..e69de29 From 5dc12aaf6428dbd8697073cc84b5edbe517201b0 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Mon, 4 Jul 2022 16:10:51 -0300 Subject: [PATCH 14/20] create the final readme --- README.md | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) diff --git a/README.md b/README.md index 2ff4709..21381ec 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,8 @@ This document will describe the following: - What is an Algorithm Development Kit - Changes to Algorithm development - Example workflows you can use to create your own Algorithms. +- The Model Manifest System +- Datarobot MLOps integrations support ## What is an Algorithm Development Kit @@ -209,6 +211,145 @@ algorithm.init({"data": "https://i.imgur.com/bXdORXl.jpeg"}) ``` +## The Model Manifest System +Model Manifests are optional files that you can provide to your algorithm to easily +define important model files, their locations; and metadata - this file is called `model_manifest.json`. + +```python +{ + "required_files" : [ + { "name": "squeezenet", + "source_uri": "data://AlgorithmiaSE/image_cassification_demo/squeezenet1_1-f364aa15.pth", + "fail_on_tamper": true, + "metadata": { + "dataset_md5_checksum": "46a44d32d2c5c07f7f66324bef4c7266" + } + }, + { + "name": "labels", + "source_uri": "data://AlgorithmiaSE/image_cassification_demo/imagenet_class_index.json", + "fail_on_tamper": true, + "metadata": { + "dataset_md5_checksum": "46a44d32d2c5c07f7f66324bef4c7266" + } + } + ], + "optional_files": [ + { + "name": "mobilenet", + "source_uri": "data://AlgorithmiaSE/image_cassification_demo/mobilenet_v2-b0353104.pth", + "fail_on_tamper": false, + "metadata": { + "dataset_md5_checksum": "46a44d32d2c5c07f7f66324bef4c7266" + } + } + ] +} +``` +With the Model Manifest system, you're also able to "freeze" your model_manifest.json, creating a model_manifest.json.freeze. +This file encodes the hash of the model file, preventing tampering once frozen - forver locking a version of your algorithm code with your model file. + +```python +{ + "required_files":[ + { + "name":"squeezenet", + "source_uri":"data://AlgorithmiaSE/image_cassification_demo/squeezenet1_1-f364aa15.pth", + "fail_on_tamper":true, + "metadata":{ + "dataset_md5_checksum":"46a44d32d2c5c07f7f66324bef4c7266" + }, + "md5_checksum":"46a44d32d2c5c07f7f66324bef4c7266" + }, + { + "name":"labels", + "source_uri":"data://AlgorithmiaSE/image_cassification_demo/imagenet_class_index.json", + "fail_on_tamper":true, + "metadata":{ + "dataset_md5_checksum":"46a44d32d2c5c07f7f66324bef4c7266" + }, + "md5_checksum":"c2c37ea517e94d9795004a39431a14cb" + } + ], + "optional_files":[ + { + "name":"mobilenet", + "source_uri":"data://AlgorithmiaSE/image_cassification_demo/mobilenet_v2-b0353104.pth", + "fail_on_tamper":false, + "metadata":{ + "dataset_md5_checksum":"46a44d32d2c5c07f7f66324bef4c7266" + } + } + ], + "timestamp":"1633450866.985464", + "lock_checksum":"24f5eca888d87661ca6fc08042e40cb7" +} +``` + +As you can link to both hosted data collections, and AWS/GCP/Azure based block storage media, you're able to link your algorithm code with your model files, wherever they live today. + + +## Datarobot MLOps Integration +As part of the integration with Datarobot, we've built out integration support for the [DataRobot MLOps Agent](https://docs.datarobot.com/en/docs/mlops/deployment/mlops-agent/index.html) +By selecting `mlops=True` as part of the ADK `init()` function, the ADK will configure and setup the MLOps Agent to support writing content directly back to DataRobot. + + +For this, you'll need to select an MLOps Enabled Environment; and you will need to setup a DataRobot External Deployment. +Once setup, you will need to define your `mlops.json` file, including your deployment and model ids. + + +```python +{ + "model_id": "YOUR_MODEL_ID", + "deployment_id": "YOUR_DEPLOYMENT_ID", + "datarobot_mlops_service_url": "https://app.datarobot.com/" +} +``` + +Along with defining your `DATAROBOT_MLOPS_API_TOKEN` as a secret to your Algorithm, you're ready to start sending MLOps data back to DataRobot! + + +```python +from Algorithmia import ADK +from time import time + +# API calls will begin at the apply() method, with the request body passed as 'input' +# For more details, see algorithmia.com/developers/algorithm-development/languages + +def load(state): + # Lets initialize the final components of the MLOps plugin and prepare it for sending info back to DataRobot. + state['mlops'] = MLOps().init() + return state + +def apply(input, state): + t1 = time() + df = pd.DataFrame(columns=['id', 'values']) + df.loc[0] = ["abcd", 0.25] + df.loc[0][1] += input + association_ids = df.iloc[:, 0].tolist() + reporting_predictions = df.loc[0][1] + t2 = time() + # As we're only making 1 prediction, our reporting tool should show only 1 prediction being made + state['mlops'].report_deployment_stats(1, t2 - t1) + + # Report the predictions data: features, predictions, class_names + state['mlops'].report_predictions_data(features_df=df, + predictions=reporting_predictions, + association_ids=association_ids) + return reporting_predictions + + +algorithm = ADK(apply, load) +algorithm.init(0.25, mlops=True) + + +``` + +report_deployment_stats() + + + + ## Readme publishing To compile the template readme, please check out [embedme](https://github.com/zakhenry/embedme) utility and run the following: From 201ee6db059983261298cb5301a118023fc6baa2 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Mon, 4 Jul 2022 16:15:40 -0300 Subject: [PATCH 15/20] replaced python with json highlighting --- README.md | 9 +++------ README_template.md | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index 21381ec..5c86caf 100644 --- a/README.md +++ b/README.md @@ -215,7 +215,7 @@ algorithm.init({"data": "https://i.imgur.com/bXdORXl.jpeg"}) Model Manifests are optional files that you can provide to your algorithm to easily define important model files, their locations; and metadata - this file is called `model_manifest.json`. -```python +```json { "required_files" : [ { "name": "squeezenet", @@ -249,7 +249,7 @@ define important model files, their locations; and metadata - this file is calle With the Model Manifest system, you're also able to "freeze" your model_manifest.json, creating a model_manifest.json.freeze. This file encodes the hash of the model file, preventing tampering once frozen - forver locking a version of your algorithm code with your model file. -```python +```json { "required_files":[ { @@ -298,7 +298,7 @@ For this, you'll need to select an MLOps Enabled Environment; and you will need Once setup, you will need to define your `mlops.json` file, including your deployment and model ids. -```python +```json { "model_id": "YOUR_MODEL_ID", "deployment_id": "YOUR_DEPLOYMENT_ID", @@ -345,9 +345,6 @@ algorithm.init(0.25, mlops=True) ``` -report_deployment_stats() - - ## Readme publishing diff --git a/README_template.md b/README_template.md index ded0700..3c47c6a 100644 --- a/README_template.md +++ b/README_template.md @@ -61,12 +61,12 @@ Check out these examples to help you get started: Model Manifests are optional files that you can provide to your algorithm to easily define important model files, their locations; and metadata - this file is called `model_manifest.json`. -```python +```json ``` With the Model Manifest system, you're also able to "freeze" your model_manifest.json, creating a model_manifest.json.freeze. This file encodes the hash of the model file, preventing tampering once frozen - forver locking a version of your algorithm code with your model file. -```python +```json ``` As you can link to both hosted data collections, and AWS/GCP/Azure based block storage media, you're able to link your algorithm code with your model files, wherever they live today. @@ -81,7 +81,7 @@ For this, you'll need to select an MLOps Enabled Environment; and you will need Once setup, you will need to define your `mlops.json` file, including your deployment and model ids. -```python +```json ``` Along with defining your `DATAROBOT_MLOPS_API_TOKEN` as a secret to your Algorithm, you're ready to start sending MLOps data back to DataRobot! @@ -90,9 +90,6 @@ Along with defining your `DATAROBOT_MLOPS_API_TOKEN` as a secret to your Algorit ```python ``` -report_deployment_stats() - - ## Readme publishing From c0f47d1048f761551e49b9c518fca39cef888dbf Mon Sep 17 00:00:00 2001 From: James Sutton <1892175+zeryx@users.noreply.github.com> Date: Thu, 14 Jul 2022 10:14:59 -0700 Subject: [PATCH 16/20] added self ref to total_dir_path --- adk/mlops.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/adk/mlops.py b/adk/mlops.py index b847a18..cbde73a 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -30,15 +30,15 @@ def init(self): os.environ['MLOPS_SPOOLER_TYPE'] = "FILESYSTEM" os.environ['MLOPS_FILESYSTEM_DIRECTORY'] = self.spool_dir - with open(total_dir_path + '/conf/mlops.agent.conf.yaml') as f: + with open(self.total_dir_path + '/conf/mlops.agent.conf.yaml') as f: documents = yaml.load(f, Loader=yaml.FullLoader) documents['mlopsUrl'] = self.endpoint documents['apiToken'] = self.token - with open(total_dir_path + '/conf/mlops.agent.conf.yaml', 'w') as f: + with open(self.total_dir_path + '/conf/mlops.agent.conf.yaml', 'w') as f: yaml.dump(documents, f) - subprocess.call(total_dir_path + '/bin/start-agent.sh') - check = subprocess.Popen([total_dir_path + '/bin/status-agent.sh'], stdout=subprocess.PIPE) + subprocess.call(self.total_dir_path + '/bin/start-agent.sh') + check = subprocess.Popen([self.total_dir_path + '/bin/status-agent.sh'], stdout=subprocess.PIPE) output = check.stdout.readlines()[0] check.terminate() if b"DataRobot MLOps-Agent is running as a service." in output: From 350a4eaf05dc5ebfcb806d05c6bbc0b3055982c0 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Fri, 22 Jul 2022 11:29:31 -0300 Subject: [PATCH 17/20] enable safe replacement of mlops.json by setting environment variables --- adk/mlops.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/adk/mlops.py b/adk/mlops.py index cbde73a..34d44e9 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -15,14 +15,18 @@ def __init__(self, api_token, path): if os.path.exists(path): with open(path) as f: mlops_config = json.load(f) - else: - raise Exception("'mlops.json' file does not exist, but mlops was requested.") + self.endpoint = mlops_config['datarobot_mlops_service_url'] + self.model_id = mlops_config['model_id'] + self.deployment_id = mlops_config['deployment_id'] + self.mlops_name = mlops_config.get('mlops_dir_name', 'datarobot_mlops_package-8.1.2') + if "MLOPS_SERVICE_URL" in os.environ: + self.endpoint = os.environ['MLOPS_SERVICE_URL'] + if "MODEL_ID" in os.environ: + self.model_id = os.environ['MODEL_ID'] + if "DEPLOYMENT_ID" in os.environ: + self.deployment_id = os.environ['DEPLOYMENT_ID'] if not os.path.exists(self.agent_dir): raise Exception("environment is not configured for mlops.\nPlease select a valid mlops enabled environment.") - self.endpoint = mlops_config['datarobot_mlops_service_url'] - self.model_id = mlops_config['model_id'] - self.deployment_id = mlops_config['deployment_id'] - self.mlops_name = mlops_config.get('mlops_dir_name', 'datarobot_mlops_package-8.1.2') def init(self): os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id From 7b145070503de84dedf09a6416c8d0c6b6ff16d9 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Tue, 26 Jul 2022 05:47:32 -0300 Subject: [PATCH 18/20] added better error checking --- adk/mlops.py | 10 +++++++++ adk/modeldata.py | 55 ++++++++++++++++++++++++++---------------------- 2 files changed, 40 insertions(+), 25 deletions(-) diff --git a/adk/mlops.py b/adk/mlops.py index 34d44e9..cd335a7 100644 --- a/adk/mlops.py +++ b/adk/mlops.py @@ -28,6 +28,16 @@ def __init__(self, api_token, path): if not os.path.exists(self.agent_dir): raise Exception("environment is not configured for mlops.\nPlease select a valid mlops enabled environment.") + if self.endpoint is None: + raise Exception("'no endpoint found, please add 'MLOPS_SERVICE_URL' environment variable, or create an " + "mlops.json file") + if self.model_id is None: + raise Exception("no model_id found, please add 'MODEL_ID' environment variable, or create an mlops.json " + "file") + if self.deployment_id is None: + raise Exception("no deployment_id found, please add 'DEPLOYMENT_ID' environment variable, or create an " + "mlops.json file") + def init(self): os.environ['MLOPS_DEPLOYMENT_ID'] = self.deployment_id os.environ['MLOPS_MODEL_ID'] = self.model_id diff --git a/adk/modeldata.py b/adk/modeldata.py index e2b49cd..893d28b 100644 --- a/adk/modeldata.py +++ b/adk/modeldata.py @@ -56,37 +56,42 @@ def initialize(self): self.models[name] = FileData(real_hash, local_data_path) def get_model(self, model_name): - if model_name in self.models: - return self.models[model_name].file_path - elif len([optional for optional in self.manifest_data['optional_files'] if - optional['name'] == model_name]) > 0: - self.find_optional_model(model_name) - return self.models[model_name].file_path + if self.available(): + if model_name in self.models: + return self.models[model_name].file_path + elif len([optional for optional in self.manifest_data['optional_files'] if + optional['name'] == model_name]) > 0: + self.find_optional_model(model_name) + return self.models[model_name].file_path + else: + raise Exception("model name " + model_name + " not found in manifest") else: - raise Exception("model name " + model_name + " not found in manifest") + raise Exception("unable to get model {}, model_manifest.json not found.".format(model_name)) def find_optional_model(self, file_name): - - found_models = [optional for optional in self.manifest_data['optional_files'] if - optional['name'] == file_name] - if len(found_models) == 0: - raise Exception("file with name '" + file_name + "' not found in model manifest.") - model_info = found_models[0] - self.models[file_name] = {} - source_uri = model_info['source_uri'] - fail_on_tamper = model_info.get("fail_on_tamper", False) - expected_hash = model_info.get('md5_checksum', None) - with self.client.file(source_uri).getFile() as f: - local_data_path = f.name - real_hash = md5_for_file(local_data_path) - if self.using_frozen: - if real_hash != expected_hash and fail_on_tamper: - raise Exception("Model File Mismatch for " + file_name + - "\nexpected hash: " + expected_hash + "\nreal hash: " + real_hash) + if self.available(): + found_models = [optional for optional in self.manifest_data['optional_files'] if + optional['name'] == file_name] + if len(found_models) == 0: + raise Exception("file with name '" + file_name + "' not found in model manifest.") + model_info = found_models[0] + self.models[file_name] = {} + source_uri = model_info['source_uri'] + fail_on_tamper = model_info.get("fail_on_tamper", False) + expected_hash = model_info.get('md5_checksum', None) + with self.client.file(source_uri).getFile() as f: + local_data_path = f.name + real_hash = md5_for_file(local_data_path) + if self.using_frozen: + if real_hash != expected_hash and fail_on_tamper: + raise Exception("Model File Mismatch for " + file_name + + "\nexpected hash: " + expected_hash + "\nreal hash: " + real_hash) + else: + self.models[file_name] = FileData(real_hash, local_data_path) else: self.models[file_name] = FileData(real_hash, local_data_path) else: - self.models[file_name] = FileData(real_hash, local_data_path) + raise Exception("unable to get model {}, model_manifest.json not found.".format(model_name)) def get_manifest(self): if os.path.exists(self.manifest_frozen_path): From cb18c18cdeddde66ae72d8f5bcd542460fdb3d6a Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Tue, 26 Jul 2022 06:12:01 -0300 Subject: [PATCH 19/20] added better stack trace processing for both load time and runtime failures --- adk/io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/adk/io.py b/adk/io.py index be2045c..ccaabfc 100644 --- a/adk/io.py +++ b/adk/io.py @@ -57,7 +57,7 @@ def create_exception(exception, loading_exception=False): response = json.dumps({ "error": { "message": str(exception), - "stacktrace": traceback.format_exc(), + "stacktrace": " ".join(traceback.format_exception(etype=type(exception), value=exception, tb=exception.__traceback__)), "error_type": error_type, } }) From 4c6b718d738f4ec6035e53712d96eaf8da3aca08 Mon Sep 17 00:00:00 2001 From: zeryx <1892175+zeryx@users.noreply.github.com> Date: Tue, 26 Jul 2022 06:37:59 -0300 Subject: [PATCH 20/20] updated test case as we're now actually using stacktraces! --- tests/test_adk_local.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_adk_local.py b/tests/test_adk_local.py index 4b5585c..d4ed78a 100644 --- a/tests/test_adk_local.py +++ b/tests/test_adk_local.py @@ -135,17 +135,18 @@ def test_manifest_file_success(self): self.assertEqual(expected_output, actual_output) def test_manifest_file_tampered(self): - input = "Algorithmia" + input = 'Algorithmia' expected_output = {"error": {"error_type": "LoadingError", "message": "Model File Mismatch for squeezenet\n" "expected hash: f20b50b44fdef367a225d41f747a0963\n" "real hash: 46a44d32d2c5c07f7f66324bef4c7266", - "stacktrace": "NoneType: None\n"}} + "stacktrace": ''}} actual_output = json.loads(self.execute_manifest_example(input, apply_successful_manifest_parsing, loading_with_manifest, manifest_path="tests/manifests/bad_model_manifest" ".json")) + actual_output['error']['stacktrace'] = '' self.assertEqual(expected_output, actual_output)