From 7db78aced99bab32aae0c21da0d841d4c68094d5 Mon Sep 17 00:00:00 2001 From: Boyuan Deng Date: Fri, 29 Apr 2022 12:48:07 -0700 Subject: [PATCH 1/3] load the filepath in convertToClient method --- mltrace/utils.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/mltrace/utils.py b/mltrace/utils.py index 265f30b..f2b12bd 100644 --- a/mltrace/utils.py +++ b/mltrace/utils.py @@ -1,4 +1,5 @@ from mltrace.db import Store +from mltrace.db.utils import _load from mltrace.entities import IOPointer, ComponentRun import copy @@ -48,14 +49,19 @@ def set_address(address: str): def convertToClient(componentRuns: typing.List): component_runs = [] for cr in componentRuns: - inputs = [ - IOPointer.from_dictionary(iop.__dict__).to_dictionary() - for iop in cr.inputs - ] - outputs = [ - IOPointer.from_dictionary(iop.__dict__).to_dictionary() - for iop in cr.outputs - ] + + inputs = [] + for iop in cr.inputs: + iop_dict = IOPointer.from_dictionary(iop.__dict__).to_dictionary() + iop_dict['value'] = _load(iop_dict['name']) + inputs.append(iop_dict) + + outputs = [] + for iop in cr.outputs: + iop_dict = IOPointer.from_dictionary(iop.__dict__).to_dictionary() + iop_dict['value'] = _load(iop_dict['name']) + outputs.append(iop_dict) + dependencies = [dep.component_name for dep in cr.dependencies] d = copy.deepcopy(cr.__dict__) d.update( From f90fc2287765832f80064b80e722564b6450395c Mon Sep 17 00:00:00 2001 From: Boyuan Deng Date: Fri, 29 Apr 2022 17:07:31 -0700 Subject: [PATCH 2/3] progress --- mltrace/entities/io_pointer.py | 8 +++++++- mltrace/utils.py | 4 ++-- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/mltrace/entities/io_pointer.py b/mltrace/entities/io_pointer.py index ac69c46..e33461f 100644 --- a/mltrace/entities/io_pointer.py +++ b/mltrace/entities/io_pointer.py @@ -9,19 +9,25 @@ class IOPointer(Base): def __init__( self, name: str, + original_content: typing.Any = "", value: typing.Any = "", pointer_type: PointerTypeEnum = PointerTypeEnum.UNKNOWN, flag: bool = False, ): self._name = name - self._value = value + self._original_content = original_content self._pointer_type = pointer_type self._flag = flag + self._value = value @property def name(self) -> str: return self._name + @property + def original_content(self) -> typing.Any: + return self._original_content + @property def value(self) -> typing.Any: return self._value diff --git a/mltrace/utils.py b/mltrace/utils.py index f2b12bd..8daa242 100644 --- a/mltrace/utils.py +++ b/mltrace/utils.py @@ -53,13 +53,13 @@ def convertToClient(componentRuns: typing.List): inputs = [] for iop in cr.inputs: iop_dict = IOPointer.from_dictionary(iop.__dict__).to_dictionary() - iop_dict['value'] = _load(iop_dict['name']) + iop_dict['original_content'] = _load(iop_dict['name']) inputs.append(iop_dict) outputs = [] for iop in cr.outputs: iop_dict = IOPointer.from_dictionary(iop.__dict__).to_dictionary() - iop_dict['value'] = _load(iop_dict['name']) + iop_dict['original_content'] = _load(iop_dict['name']) outputs.append(iop_dict) dependencies = [dep.component_name for dep in cr.dependencies] From 286884f7840b424b04c36a02e8e1455d7643daaa Mon Sep 17 00:00:00 2001 From: Boyuan Deng Date: Wed, 18 May 2022 00:17:02 -0700 Subject: [PATCH 3/3] convert input iopointer value into client facing format and added test --- examples/io_pointer_load_test.py | 35 +++++++++++++++++++++++++++++++ mltrace/entities/component_run.py | 4 ++++ 2 files changed, 39 insertions(+) create mode 100644 examples/io_pointer_load_test.py diff --git a/examples/io_pointer_load_test.py b/examples/io_pointer_load_test.py new file mode 100644 index 0000000..3f555f8 --- /dev/null +++ b/examples/io_pointer_load_test.py @@ -0,0 +1,35 @@ +import pandas as pd +from mltrace import Component + +# sample dataframe +students = pd.DataFrame({'Name': ['Rohan', 'Rahul', 'Gaurav', + 'Ananya', 'Vinay', 'Rohan', + 'Vivek', 'Vinay'], + + 'Score': [76, 69, 70, 88, 79, 64, 62, 57]}) + +# function to apply to dataframe + + +def double(a): + return 2*a + + +new_component = Component("test_io_pointer_load_component", "boyuan") + + +@new_component.run(auto_log=True) +def doubleScore(df): + df_copy = pd.DataFrame.copy(df) + df_copy['Score'] = df_copy['Score'].apply(double) + return df + + +doubleScore(students) + +num_component_run = len(new_component.history) +last_component_run = new_component.history.get_runs_by_index( + -1, num_component_run) + +# expect no difference +print(last_component_run[0].inputs[0]['original_content'].compare(students)) diff --git a/mltrace/entities/component_run.py b/mltrace/entities/component_run.py index fe9c27e..834ef28 100644 --- a/mltrace/entities/component_run.py +++ b/mltrace/entities/component_run.py @@ -266,7 +266,11 @@ def __repr__(self): for inp in params["inputs"]: if 'value' in inp.keys(): del inp['value'] + if inp['original-content'] is not None: + inp['original-content'] = inp['original-content'].to_json() for out in params["outputs"]: if 'value' in out.keys(): del out["value"] + if out['original-content'] is not None: + out['original-content'] = out['original-content'].to_json() return json.dumps(params)