asos-danielbunting
diff --git a/‎examples/bls_decoupled/async_client.py‎
Lines changed: 4 additions & 3 deletions b/‎examples/bls_decoupled/async_client.py‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎examples/bls_decoupled/async_model.py‎
Lines changed: 1 addition & 2 deletions b/‎examples/bls_decoupled/async_model.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎examples/bls_decoupled/sync_model.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/bls_decoupled/sync_model.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎examples/custom_metrics/client.py‎
Lines changed: 10 additions & 3 deletions b/‎examples/custom_metrics/client.py‎
Lines changed: 10 additions & 3 deletions
diff --git a/‎examples/custom_metrics/model.py‎
Lines changed: 1 addition & 1 deletion b/‎examples/custom_metrics/model.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/instance_kind/model.py‎
Lines changed: 2 additions & 2 deletions b/‎examples/instance_kind/model.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎inferentia/scripts/gen_triton_model.py‎
Lines changed: 13 additions & 8 deletions b/‎inferentia/scripts/gen_triton_model.py‎
Lines changed: 13 additions & 8 deletions
diff --git a/‎src/gpu_buffers.cc‎
Lines changed: 1 addition & 0 deletions b/‎src/gpu_buffers.cc‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/infer_payload.h‎
Lines changed: 1 addition & 0 deletions b/‎src/infer_payload.h‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/infer_request.h‎
Lines changed: 1 addition & 0 deletions b/‎src/infer_request.h‎
Lines changed: 1 addition & 0 deletions
@@ -53,12 +53,13 @@
         # output_data contains two times of the square value of the input value.
         output_data = response.as_numpy("SUM")
         print("==========model result==========")
-        print("Two times the square value of {} is {}\n".format(input_data, output_data))
+        print("Two times the square value of {} is {}\n".format(
+            input_data, output_data))
 
-        if not np.allclose((2*input_data*input_data), output_data):
+        if not np.allclose((2 * input_data * input_data), output_data):
             print(
                 "BLS Decoupled Async example error: incorrect output value. Expected {}, got {}."
-            .format((2*input_data*input_data), output_data))
+                .format((2 * input_data * input_data), output_data))
             sys.exit(1)
 
     print('PASS: BLS Decoupled Async')
 
@@ -126,8 +126,7 @@ async def execute(self, requests):
         # Wait for all the inference requests to finish. The execution
         # of the Python script will be blocked until all the awaitables
         # are resolved.
-        async_responses = await asyncio.gather(
-            *inference_response_awaits)
+        async_responses = await asyncio.gather(*inference_response_awaits)
 
         # The variable that will store the sum of the responses.
         response_sum = np.array([0])
 
@@ -126,8 +126,8 @@ def execute(self, requests):
 
             # Check for the last empty response.
             if len(infer_response.output_tensors()) > 0:
-              response_sum += pb_utils.get_output_tensor_by_name(
-                  infer_response, "OUT").as_numpy()
+                response_sum += pb_utils.get_output_tensor_by_name(
+                    infer_response, "OUT").as_numpy()
 
         response = [
             pb_utils.InferenceResponse(
 
@@ -34,12 +34,14 @@
 model_name = "custom_metrics"
 shape = [4]
 
+
 def get_metrics():
     metrics_url = "http://localhost:8002/metrics"
     r = requests.get(metrics_url)
     r.raise_for_status()
     return r.text
 
+
 with httpclient.InferenceServerClient("localhost:8000") as client:
     input0_data = np.random.rand(*shape).astype(np.float32)
     input1_data = np.random.rand(*shape).astype(np.float32)
@@ -78,13 +80,18 @@ def get_metrics():
     patterns = [
         '# HELP requests_process_latency_ns Cumulative time spent processing requests',
         '# TYPE requests_process_latency_ns counter',
-        'requests_process_latency_ns{model="custom_metrics",version="1"}']
+        'requests_process_latency_ns{model="custom_metrics",version="1"}'
+    ]
     for pattern in patterns:
         if pattern not in metrics:
-            print("custom_metrics example error: missing pattern '{}' in metrics".format(pattern))
+            print(
+                "custom_metrics example error: missing pattern '{}' in metrics".
+                format(pattern))
             sys.exit(1)
         else:
-            print("custom_metrics example: found pattern '{}' in metrics".format(pattern))
+            print(
+                "custom_metrics example: found pattern '{}' in metrics".format(
+                    pattern))
 
     print('PASS: custom_metrics')
     sys.exit(0)
@@ -80,7 +80,7 @@ def initialize(self, args):
         self.metric_family = pb_utils.MetricFamily(
             name="requests_process_latency_ns",
             description="Cumulative time spent processing requests",
-            kind=pb_utils.MetricFamily.COUNTER # or pb_utils.MetricFamily.GAUGE
+            kind=pb_utils.MetricFamily.COUNTER  # or pb_utils.MetricFamily.GAUGE
         )
 
         # Create a Metric object under the MetricFamily object. The 'labels'
 
@@ -45,9 +45,9 @@ def initialize(self, args):
         the default device of the framework.
         """
         self.device = 'cuda' if args["model_instance_kind"] == "GPU" else 'cpu'
-        # This example is configured to work with torch=1.13 
+        # This example is configured to work with torch=1.13
         # and torchvision=0.14. Thus, we need to provide a proper tag `0.14.1`
-        # to make sure loaded Resnet50 is compatible with 
+        # to make sure loaded Resnet50 is compatible with
         # installed `torchvision`.
         # Refer to README for installation instructions.
         self.model = torch.hub.load("pytorch/vision:v0.14.1",
 
@@ -606,7 +606,8 @@ def finalize(self):
 
 
 def get_triton_python_model_impl(using_tensorflow_model,
-                                 disable_batch_requests_to_neuron, is_inf2=False):
+                                 disable_batch_requests_to_neuron,
+                                 is_inf2=False):
     triton_pmi = '''
 class TritonPythonModel:
     """Your Python model must use the same class name. Every Python model
@@ -627,7 +628,9 @@ class TritonPythonModel:
     return triton_pmi
 
 
-def create_model_file(using_tensorflow_model, disable_batch_requests_to_neuron, is_inf2=False):
+def create_model_file(using_tensorflow_model,
+                      disable_batch_requests_to_neuron,
+                      is_inf2=False):
     triton_model = get_model_license()
     triton_model += '''
 import json
@@ -661,12 +664,14 @@ def create_model_file(using_tensorflow_model, disable_batch_requests_to_neuron,
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--inf2',
-                        required=False,
-                        default=False,
-                        action='store_true',
-                        help="Specify whether the model should be generate for inf2 or inf1, default is inf1"
-                        )
+    parser.add_argument(
+        '--inf2',
+        required=False,
+        default=False,
+        action='store_true',
+        help=
+        "Specify whether the model should be generate for inf2 or inf1, default is inf1"
+    )
     parser.add_argument('--model_type',
                         type=str,
                         required=True,
 
@@ -25,6 +25,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include "gpu_buffers.h"
+
 #include "pb_string.h"
 
 namespace triton { namespace backend { namespace python {
 
@@ -28,6 +28,7 @@
 
 #include <functional>
 #include <queue>
+
 #include "infer_response.h"
 #include "pb_preferred_memory.h"
 
 
@@ -28,6 +28,7 @@
 
 #include <future>
 #include <string>
+
 #include "infer_response.h"
 #include "pb_preferred_memory.h"
 #include "pb_tensor.h"
Original file line number	Diff line number	Diff line change
`@@ -80,7 +80,7 @@ def initialize(self, args):`
`80`	`80`	`self.metric_family = pb_utils.MetricFamily(`
`81`	`81`	`name="requests_process_latency_ns",`
`82`	`82`	`description="Cumulative time spent processing requests",`
`83`		`- kind=pb_utils.MetricFamily.COUNTER # or pb_utils.MetricFamily.GAUGE`
	`83`	`+ kind=pb_utils.MetricFamily.COUNTER # or pb_utils.MetricFamily.GAUGE`
`84`	`84`	`)`
`85`	`85`
`86`	`86`	`# Create a Metric object under the MetricFamily object. The 'labels'`