From 9412729ca9968d05d12051b8e8542e2504ba2f6c Mon Sep 17 00:00:00 2001
From: Steven <steven@darebait.com>
Date: Sun, 19 Jun 2022 01:16:50 -0700
Subject: [PATCH 01/77] Fix Conv2dTranspose bias

Conv2dTranspose defaults to have use_bias = true but currently throws a not implemented exception when the parameter is true.
---
 src/TensorFlowNET.Keras/Layers/LayersApi.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
index aa4f416f6..548e3ff95 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
@@ -235,7 +235,7 @@ public Conv2DTranspose Conv2DTranspose(int filters,
             string data_format = null,
             Shape dilation_rate = null,
             string activation = null,
-            bool use_bias = true,
+            bool use_bias = false,
             string kernel_initializer = null,
             string bias_initializer = null,
             string kernel_regularizer = null,

From aac52940ade5c788bc7d8d6949da718b63293dc1 Mon Sep 17 00:00:00 2001
From: lingbai-kong <oc371@tongji.edu.cn>
Date: Fri, 23 Jun 2023 13:17:46 +0800
Subject: [PATCH 02/77] init pickle support to np.load object type of npy

---
 .../NumPy/DtypeConstructor.cs                 | 40 ++++++++++++
 .../Implementation/NumPyImpl.Creation.cs      | 18 +++++-
 .../NumPy/Implementation/NumPyImpl.load.cs    | 22 +++++--
 .../NumPy/MultiArrayConstructor.cs            | 44 +++++++++++++
 .../NumPy/NDArray.Pickle.cs                   | 19 ++++++
 .../Tensorflow.Binding.csproj                 |  1 +
 src/TensorFlowNET.Keras/Datasets/Imdb.cs      | 63 +++++++++++++++++--
 .../Dataset/DatasetTest.cs                    | 17 +++++
 8 files changed, 215 insertions(+), 9 deletions(-)
 create mode 100644 src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs
 create mode 100644 src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs
 create mode 100644 src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs

diff --git a/src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs b/src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs
new file mode 100644
index 000000000..f84f408e1
--- /dev/null
+++ b/src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs
@@ -0,0 +1,40 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Text;
+using Razorvine.Pickle;
+
+namespace Tensorflow.NumPy
+{
+    /// <summary>
+    /// 
+    /// </summary>
+    [SuppressMessage("ReSharper", "InconsistentNaming")]
+    [SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
+    [SuppressMessage("ReSharper", "MemberCanBeMadeStatic.Global")]
+    class DtypeConstructor : IObjectConstructor
+    {
+        public object construct(object[] args)
+        {
+            Console.WriteLine("DtypeConstructor");
+            Console.WriteLine(args.Length);
+            for (int i = 0; i < args.Length; i++)
+            {
+                Console.WriteLine(args[i]);
+            }
+            return new demo();
+        }
+    }
+    class demo
+    {
+        public void __setstate__(object[] args)
+        {
+            Console.WriteLine("demo __setstate__");
+            Console.WriteLine(args.Length);
+            for (int i = 0; i < args.Length; i++)
+            {
+                Console.WriteLine(args[i]);
+            }
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
index f29879b0f..80b62198a 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
@@ -4,6 +4,7 @@
 using System.Linq;
 using System.Text;
 using Tensorflow.Util;
+using Razorvine.Pickle;
 using static Tensorflow.Binding;
 
 namespace Tensorflow.NumPy
@@ -93,10 +94,25 @@ Array ReadValueMatrix(BinaryReader reader, Array matrix, int bytes, Type type, i
             
             var buffer = reader.ReadBytes(bytes * total);
             System.Buffer.BlockCopy(buffer, 0, matrix, 0, buffer.Length);
-
             return matrix;
         }
 
+        NDArray ReadObjectMatrix(BinaryReader reader, Array matrix, int[] shape)
+        {
+            //int data = reader.ReadByte();
+            //Console.WriteLine(data);
+            //Console.WriteLine(reader.ReadByte());
+            Stream stream = reader.BaseStream;
+            Unpickler.registerConstructor("numpy.core.multiarray", "_reconstruct", new MultiArrayConstructor());
+            Unpickler.registerConstructor("numpy", "dtype", new DtypeConstructor());
+
+            var unpickler = new Unpickler();
+            
+            NDArray result = (NDArray) unpickler.load(stream);
+            Console.WriteLine(result.dims);
+            return result;
+        }
+
         public (NDArray, NDArray) meshgrid<T>(T[] array, bool copy = true, bool sparse = false)
         {
             var tensors = array_ops.meshgrid(array, copy: copy, sparse: sparse);
diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
index 05f53d5e7..789f119a1 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
@@ -27,9 +27,20 @@ public Array LoadMatrix(Stream stream)
                 Array matrix = Array.CreateInstance(type, shape);
 
                 //if (type == typeof(String))
-                    //return ReadStringMatrix(reader, matrix, bytes, type, shape);
+                //return ReadStringMatrix(reader, matrix, bytes, type, shape);
+                NDArray res = ReadObjectMatrix(reader, matrix, shape);
+                Console.WriteLine("LoadMatrix");
+                Console.WriteLine(res.dims[0]);
+                Console.WriteLine((int)res[0][0]);
+                Console.WriteLine(res.dims[1]);
+                //if (type == typeof(Object))
+                //{
+
+                //}
+                //else 
                 return ReadValueMatrix(reader, matrix, bytes, type, shape);
             }
+
         }
 
         public T Load<T>(Stream stream)
@@ -37,7 +48,7 @@ public T Load<T>(Stream stream)
             ICloneable, IList, ICollection, IEnumerable, IStructuralComparable, IStructuralEquatable
         {
             // if (typeof(T).IsArray && (typeof(T).GetElementType().IsArray || typeof(T).GetElementType() == typeof(string)))
-                // return LoadJagged(stream) as T;
+            // return LoadJagged(stream) as T;
             return LoadMatrix(stream) as T;
         }
 
@@ -48,7 +59,7 @@ bool ParseReader(BinaryReader reader, out int bytes, out Type t, out int[] shape
             shape = null;
 
             // The first 6 bytes are a magic string: exactly "x93NUMPY"
-            if (reader.ReadChar() != 63) return false;
+            if (reader.ReadByte() != 0x93) return false;
             if (reader.ReadChar() != 'N') return false;
             if (reader.ReadChar() != 'U') return false;
             if (reader.ReadChar() != 'M') return false;
@@ -64,6 +75,7 @@ bool ParseReader(BinaryReader reader, out int bytes, out Type t, out int[] shape
             ushort len = reader.ReadUInt16();
 
             string header = new String(reader.ReadChars(len));
+            Console.WriteLine(header);
             string mark = "'descr': '";
             int s = header.IndexOf(mark) + mark.Length;
             int e = header.IndexOf("'", s + 1);
@@ -93,7 +105,7 @@ bool ParseReader(BinaryReader reader, out int bytes, out Type t, out int[] shape
         Type GetType(string dtype, out int bytes, out bool? isLittleEndian)
         {
             isLittleEndian = IsLittleEndian(dtype);
-            bytes = Int32.Parse(dtype.Substring(2));
+            bytes = dtype.Length > 2 ? Int32.Parse(dtype.Substring(2)) : 0;
 
             string typeCode = dtype.Substring(1);
 
@@ -121,6 +133,8 @@ Type GetType(string dtype, out int bytes, out bool? isLittleEndian)
                 return typeof(Double);
             if (typeCode.StartsWith("S"))
                 return typeof(String);
+            if (typeCode == "O")
+                return typeof(Object);
 
             throw new NotSupportedException();
         }
diff --git a/src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs b/src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs
new file mode 100644
index 000000000..92927cd5a
--- /dev/null
+++ b/src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs
@@ -0,0 +1,44 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Diagnostics.CodeAnalysis;
+using System.Text;
+using Razorvine.Pickle;
+
+namespace Tensorflow.NumPy
+{
+    /// <summary>
+    /// Creates multiarrays of objects. Returns a primitive type multiarray such as int[][] if 
+    /// the objects are ints, etc. 
+    /// </summary>
+    [SuppressMessage("ReSharper", "InconsistentNaming")]
+    [SuppressMessage("ReSharper", "MemberCanBePrivate.Global")]
+    [SuppressMessage("ReSharper", "MemberCanBeMadeStatic.Global")]
+    public class MultiArrayConstructor : IObjectConstructor
+    {
+        public object construct(object[] args)
+        {
+            //Console.WriteLine(args.Length);
+            //for (int i = 0; i < args.Length; i++)
+            //{
+            //    Console.WriteLine(args[i]);
+            //}
+            Console.WriteLine("MultiArrayConstructor");
+
+            var arg1 = (Object[])args[1];
+            var dims = new int[arg1.Length];
+            for (var i = 0; i < arg1.Length; i++)
+            {
+                dims[i] = (int)arg1[i];
+            }
+
+            var dtype = TF_DataType.DtInvalid;
+            switch (args[2])
+            {
+                case "b": dtype = TF_DataType.DtUint8Ref; break;
+                default: throw new NotImplementedException("cannot parse" + args[2]);
+            }
+            return new NDArray(new Shape(dims), dtype);
+
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs b/src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs
new file mode 100644
index 000000000..b4d66243a
--- /dev/null
+++ b/src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs
@@ -0,0 +1,19 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.NumPy
+{
+    public partial class NDArray
+    {
+        public void __setstate__(object[] args)
+        {
+            Console.WriteLine("NDArray __setstate__");
+            Console.WriteLine(args.Length);
+            for (int i = 0; i < args.Length; i++)
+            {
+                Console.WriteLine(args[i]);
+            }
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
index 09f5b0770..38778c3fe 100644
--- a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
+++ b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
@@ -112,6 +112,7 @@ https://tensorflownet.readthedocs.io</Description>
     <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
     <PackageReference Include="OneOf" Version="3.0.223" />
     <PackageReference Include="Protobuf.Text" Version="0.7.0" />
+    <PackageReference Include="Razorvine.Pickle" Version="1.4.0" />
     <PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
   </ItemGroup>
 </Project>
diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
index 56b0d2a77..016b352d9 100644
--- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs
+++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -5,6 +5,13 @@
 using Tensorflow.Keras.Utils;
 using Tensorflow.NumPy;
 using System.Linq;
+using Google.Protobuf.Collections;
+using Microsoft.VisualBasic;
+using OneOf.Types;
+using static HDF.PInvoke.H5;
+using System.Data;
+using System.Reflection.Emit;
+using System.Xml.Linq;
 
 namespace Tensorflow.Keras.Datasets
 {
@@ -12,13 +19,59 @@ namespace Tensorflow.Keras.Datasets
     /// This is a dataset of 25,000 movies reviews from IMDB, labeled by sentiment
     /// (positive/negative). Reviews have been preprocessed, and each review is
     /// encoded as a list of word indexes(integers).
+    /// For convenience, words are indexed by overall frequency in the dataset,
+    /// so that for instance the integer "3" encodes the 3rd most frequent word in
+    /// the data.This allows for quick filtering operations such as:
+    /// "only consider the top 10,000 most
+    /// common words, but eliminate the top 20 most common words".
+    /// As a convention, "0" does not stand for a specific word, but instead is used
+    /// to encode the pad token.
+    /// Args:
+    /// path: where to cache the data (relative to %TEMP%/imdb/imdb.npz).
+    /// num_words: integer or None.Words are
+    ///     ranked by how often they occur(in the training set) and only
+    ///     the `num_words` most frequent words are kept.Any less frequent word
+    ///     will appear as `oov_char` value in the sequence data.If None,
+    ///     all words are kept.Defaults to `None`.
+    /// skip_top: skip the top N most frequently occurring words
+    ///     (which may not be informative). These words will appear as
+    ///     `oov_char` value in the dataset.When 0, no words are
+    ///     skipped. Defaults to `0`.
+    /// maxlen: int or None.Maximum sequence length.
+    ///     Any longer sequence will be truncated. None, means no truncation.
+    ///     Defaults to `None`.
+    /// seed: int. Seed for reproducible data shuffling.
+    /// start_char: int. The start of a sequence will be marked with this
+    ///     character. 0 is usually the padding character. Defaults to `1`.
+    /// oov_char: int. The out-of-vocabulary character.
+    ///     Words that were cut out because of the `num_words` or
+    ///     `skip_top` limits will be replaced with this character.
+    /// index_from: int. Index actual words with this index and higher.
+    ///     Returns:
+    /// Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
+    /// 
+    /// ** x_train, x_test**: lists of sequences, which are lists of indexes
+    ///     (integers). If the num_words argument was specific, the maximum
+    ///     possible index value is `num_words - 1`. If the `maxlen` argument was
+    ///     specified, the largest possible sequence length is `maxlen`.
+    /// 
+    /// ** y_train, y_test**: lists of integer labels(1 or 0).
+    /// 
+    /// Raises:
+    /// ValueError: in case `maxlen` is so low
+    ///     that no input sequence could be kept.
+    /// Note that the 'out of vocabulary' character is only used for
+    /// words that were present in the training set but are not included
+    /// because they're not making the `num_words` cut here.
+    /// Words that were not seen in the training set but are in the test set
+    /// have simply been skipped.
     /// </summary>
+    /// """Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/).
     public class Imdb
     {
         string origin_folder = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/";
         string file_name = "imdb.npz";
         string dest_folder = "imdb";
-
         /// <summary>
         /// Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/).
         /// </summary>
@@ -41,8 +94,10 @@ public DatasetPass load_data(string path = "imdb.npz",
             int index_from = 3)
         {
             var dst = Download();
-
-            var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt"));
+            var fileBytes = File.ReadAllBytes(Path.Combine(dst, file_name));
+            var (x_train, x_test) = LoadX(fileBytes);
+            var (y_train, y_test) = LoadY(fileBytes);
+            /*var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt"));
             var x_train_string = new string[lines.Length];
             var y_train = np.zeros(new int[] { lines.Length }, np.int64);
             for (int i = 0; i < lines.Length; i++)
@@ -62,7 +117,7 @@ public DatasetPass load_data(string path = "imdb.npz",
                 x_test_string[i] = lines[i].Substring(2);
             }
 
-            var x_test = np.array(x_test_string);
+            var x_test = np.array(x_test_string);*/
 
             return new DatasetPass
             {
diff --git a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
index 8317346ea..778290bb8 100644
--- a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
+++ b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
@@ -1,7 +1,9 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
+using System.Collections.Generic;
 using System.Linq;
 using static Tensorflow.Binding;
+using static Tensorflow.KerasApi;
 
 namespace TensorFlowNET.UnitTest.Dataset
 {
@@ -195,5 +197,20 @@ public void Shuffle()
 
             Assert.IsFalse(allEqual);
         }
+        [TestMethod]
+        public void GetData()
+        {
+            var vocab_size = 20000; // Only consider the top 20k words
+            var maxlen = 200; // Only consider the first 200 words of each movie review
+            var dataset = keras.datasets.imdb.load_data(num_words: vocab_size);
+            var x_train = dataset.Train.Item1;
+            var y_train = dataset.Train.Item2;
+            var x_val = dataset.Test.Item1;
+            var y_val = dataset.Test.Item2;
+            print(len(x_train) + "Training sequences");
+            print(len(x_val) + "Validation sequences");
+            x_train = keras.preprocessing.sequence.pad_sequences((IEnumerable<int[]>)x_train, maxlen: maxlen);
+            x_val = keras.preprocessing.sequence.pad_sequences((IEnumerable<int[]>)x_val, maxlen: maxlen);
+        }
     }
 }

From f5eb4ff0a0950fa1b0c3af9b67950e4f4dc90a1a Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Sat, 26 Aug 2023 10:35:45 +0800
Subject: [PATCH 03/77] fix: partially fix the bug of load_model

---
 .../ArgsDefinition/Activation/ExponentialArgs.cs      | 10 ++++++++++
 .../ArgsDefinition/Activation/HardSigmoidArgs.cs      | 10 ++++++++++
 .../Keras/ArgsDefinition/Activation/SELUArgs.cs       | 11 +++++++++++
 .../Keras/ArgsDefinition/Activation/SoftplusArgs.cs   | 10 ++++++++++
 .../Keras/ArgsDefinition/Activation/SoftsignArgs.cs   | 10 ++++++++++
 .../Keras/ArgsDefinition/Activation/SwishArgs.cs      | 10 ++++++++++
 .../Keras/ArgsDefinition/Activation/TanhArgs.cs       | 10 ++++++++++
 .../ArgsDefinition/Convolution/Conv2DTransposeArgs.cs | 10 ++++++++++
 .../Keras/ArgsDefinition/Merging/AddArgs.cs           | 10 ++++++++++
 .../Keras/ArgsDefinition/Merging/ConcatenateArgs.cs   | 10 ++++++++++
 .../Keras/ArgsDefinition/Merging/SubtractArgs.cs      | 10 ++++++++++
 .../Pooling/GlobalAveragePooling1DArgs.cs             | 10 ++++++++++
 .../Pooling/GlobalAveragePooling2DArgs.cs             | 10 ++++++++++
 .../ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs  | 10 ++++++++++
 .../ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs  | 10 ++++++++++
 .../Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs  | 10 ++++++++++
 16 files changed, 161 insertions(+)
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/ExponentialArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/HardSigmoidArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SELUArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftplusArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftsignArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SwishArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/TanhArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Convolution/Conv2DTransposeArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/AddArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/ConcatenateArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/SubtractArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling1DArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling2DArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs

diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/ExponentialArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/ExponentialArgs.cs
new file mode 100644
index 000000000..ef024971d
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/ExponentialArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class ExponentialArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/HardSigmoidArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/HardSigmoidArgs.cs
new file mode 100644
index 000000000..788e0f36d
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/HardSigmoidArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class HardSigmoidArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SELUArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SELUArgs.cs
new file mode 100644
index 000000000..eb0e18446
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SELUArgs.cs
@@ -0,0 +1,11 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class SELUArgs : LayerArgs
+    {
+
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftplusArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftplusArgs.cs
new file mode 100644
index 000000000..7b4f20795
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftplusArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class SoftplusArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftsignArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftsignArgs.cs
new file mode 100644
index 000000000..4e23d261d
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SoftsignArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class SoftsignArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SwishArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SwishArgs.cs
new file mode 100644
index 000000000..3dea06a23
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/SwishArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class SwishArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/TanhArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/TanhArgs.cs
new file mode 100644
index 000000000..5df41b71b
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Activation/TanhArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class TanhArgs : LayerArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Convolution/Conv2DTransposeArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Convolution/Conv2DTransposeArgs.cs
new file mode 100644
index 000000000..3daba9465
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Convolution/Conv2DTransposeArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class Conv2DTransposeArgs : Conv2DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/AddArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/AddArgs.cs
new file mode 100644
index 000000000..016d58203
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/AddArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class AddArgs : MergeArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/ConcatenateArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/ConcatenateArgs.cs
new file mode 100644
index 000000000..4a81d139d
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/ConcatenateArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class ConcatenateArgs : MergeArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/SubtractArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/SubtractArgs.cs
new file mode 100644
index 000000000..1e3621cb6
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/SubtractArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class SubtractArgs : MergeArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling1DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling1DArgs.cs
new file mode 100644
index 000000000..e73aff766
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling1DArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GlobalAveragePooling1DArgs : Pooling1DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling2DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling2DArgs.cs
new file mode 100644
index 000000000..d143cf471
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalAveragePooling2DArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GlobalAveragePooling2DArgs : Pooling2DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs
new file mode 100644
index 000000000..e03227feb
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling1DArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GlobalMaxPooling1DArgs : Pooling1DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs
new file mode 100644
index 000000000..a95cac836
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/GlobalMaxPooling2DArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GlobalMaxPooling2DArgs : Pooling2DArgs
+    {
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs
new file mode 100644
index 000000000..4cfff2c15
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Pooling/MaxPooling1DArgs.cs
@@ -0,0 +1,10 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class MaxPooling1DArgs : Pooling1DArgs
+    {
+    }
+}

From f679af67e61c51bee1aca254f993d6d137df07ff Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Sat, 26 Aug 2023 11:36:41 +0800
Subject: [PATCH 04/77] fix: partially fix the bug of load_model

---
 .../Layers/LayersApi.Activation.cs             | 14 +++++++-------
 .../Layers/LayersApi.Merging.cs                |  2 +-
 src/TensorFlowNET.Keras/Layers/LayersApi.cs    | 18 +++++++++---------
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs
index 280e91e2c..2c55f8fd5 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.Activation.cs
@@ -10,14 +10,14 @@ public partial class LayersApi {
             public ILayer ELU ( float alpha = 0.1f )
                   => new ELU(new ELUArgs { Alpha = alpha });
             public ILayer SELU ()
-                  => new SELU(new LayerArgs { });
+                  => new SELU(new SELUArgs { });
             public ILayer Softmax(int axis = -1) => new Softmax(new SoftmaxArgs { axis = axis });
             public ILayer Softmax ( Axis axis ) => new Softmax(new SoftmaxArgs { axis = axis });
-            public ILayer Softplus () => new Softplus(new LayerArgs { });
-            public ILayer HardSigmoid () => new HardSigmoid(new LayerArgs { });
-            public ILayer Softsign () => new Softsign(new LayerArgs { });
-            public ILayer Swish () => new Swish(new LayerArgs { });
-            public ILayer Tanh () => new Tanh(new LayerArgs { });
-            public ILayer Exponential () => new Exponential(new LayerArgs { });
+            public ILayer Softplus () => new Softplus(new SoftplusArgs { });
+            public ILayer HardSigmoid () => new HardSigmoid(new HardSigmoidArgs { });
+            public ILayer Softsign () => new Softsign(new SoftsignArgs { });
+            public ILayer Swish () => new Swish(new SwishArgs { });
+            public ILayer Tanh () => new Tanh(new TanhArgs { });
+            public ILayer Exponential () => new Exponential(new ExponentialArgs { });
       }
 }
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs
index d94bfb4d8..bf06b1418 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.Merging.cs
@@ -14,7 +14,7 @@ public partial class LayersApi
         /// <param name="axis">Axis along which to concatenate.</param>
         /// <returns></returns>
         public ILayer Concatenate(int axis = -1)
-            => new Concatenate(new MergeArgs
+            => new Concatenate(new ConcatenateArgs
             {
                 Axis = axis
             });
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
index a04a9c051..9155c7742 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
@@ -240,7 +240,7 @@ public ILayer Conv2DTranspose(int filters,
             string kernel_regularizer = null,
             string bias_regularizer = null,
             string activity_regularizer = null)
-                => new Conv2DTranspose(new Conv2DArgs
+                => new Conv2DTranspose(new Conv2DTransposeArgs
                 {
                     Rank = 2,
                     Filters = filters,
@@ -568,7 +568,7 @@ public ILayer MaxPooling1D(int? pool_size = null,
             int? strides = null,
             string padding = "valid",
             string data_format = null)
-            => new MaxPooling1D(new Pooling1DArgs
+            => new MaxPooling1D(new MaxPooling1DArgs
             {
                 PoolSize = pool_size ?? 2,
                 Strides = strides ?? (pool_size ?? 2),
@@ -944,21 +944,21 @@ public ILayer Rescaling(float scale,
         /// </summary>
         /// <returns></returns>
         public ILayer Add()
-            => new Add(new MergeArgs { });
+            => new Add(new AddArgs { });
 
         /// <summary>
         /// 
         /// </summary>
         /// <returns></returns>
         public ILayer Subtract()
-            => new Subtract(new MergeArgs { });
+            => new Subtract(new SubtractArgs { });
 
         /// <summary>
         /// Global max pooling operation for spatial data.
         /// </summary>
         /// <returns></returns>
         public ILayer GlobalAveragePooling2D()
-            => new GlobalAveragePooling2D(new Pooling2DArgs { });
+            => new GlobalAveragePooling2D(new GlobalAveragePooling2DArgs { });
 
         /// <summary>
         /// Global average pooling operation for temporal data.
@@ -968,7 +968,7 @@ public ILayer GlobalAveragePooling2D()
         /// </param>
         /// <returns></returns>
         public ILayer GlobalAveragePooling1D(string data_format = "channels_last")
-            => new GlobalAveragePooling1D(new Pooling1DArgs { DataFormat = data_format });
+            => new GlobalAveragePooling1D(new GlobalAveragePooling1DArgs { DataFormat = data_format });
 
         /// <summary>
         /// Global max pooling operation for spatial data.
@@ -977,7 +977,7 @@ public ILayer GlobalAveragePooling1D(string data_format = "channels_last")
         /// channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width).</param>
         /// <returns></returns>
         public ILayer GlobalAveragePooling2D(string data_format = "channels_last")
-            => new GlobalAveragePooling2D(new Pooling2DArgs { DataFormat = data_format });
+            => new GlobalAveragePooling2D(new GlobalAveragePooling2DArgs { DataFormat = data_format });
 
         /// <summary>
         /// Global max pooling operation for 1D temporal data.
@@ -988,7 +988,7 @@ public ILayer GlobalAveragePooling2D(string data_format = "channels_last")
         /// </param>
         /// <returns></returns>
         public ILayer GlobalMaxPooling1D(string data_format = "channels_last")
-            => new GlobalMaxPooling1D(new Pooling1DArgs { DataFormat = data_format });
+            => new GlobalMaxPooling1D(new GlobalMaxPooling1DArgs { DataFormat = data_format });
 
         /// <summary>
         /// Global max pooling operation for spatial data.
@@ -997,7 +997,7 @@ public ILayer GlobalMaxPooling1D(string data_format = "channels_last")
         /// channels_last corresponds to inputs with shape (batch, height, width, channels) while channels_first corresponds to inputs with shape (batch, channels, height, width).</param>
         /// <returns></returns>
         public ILayer GlobalMaxPooling2D(string data_format = "channels_last")
-            => new GlobalMaxPooling2D(new Pooling2DArgs { DataFormat = data_format });
+            => new GlobalMaxPooling2D(new GlobalMaxPooling2DArgs { DataFormat = data_format });
 
         /// <summary>
         /// Get an weights initializer from its name.

From 8e3ba22c832e6d34598644686e00182924b08c3a Mon Sep 17 00:00:00 2001
From: lingbai-kong <oc371@tongji.edu.cn>
Date: Sat, 26 Aug 2023 16:29:28 +0800
Subject: [PATCH 05/77] fix: validate dataset of `Imdb` do not load bug & add:
 custom `Imdb` path

---
 src/TensorFlowNET.Keras/Datasets/Imdb.cs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
index 61ce39475..a62f3f87d 100644
--- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs
+++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -31,7 +31,7 @@ public class Imdb
         /// <param name="oov_char"></param>
         /// <param name="index_from"></param>
         /// <returns></returns>
-        public DatasetPass load_data(string path = "imdb.npz",
+        public DatasetPass load_data(string? path = "imdb.npz",
             int num_words = -1,
             int skip_top = 0,
             int maxlen = -1,
@@ -42,7 +42,7 @@ public DatasetPass load_data(string path = "imdb.npz",
         {
             if (maxlen == -1) throw new InvalidArgumentError("maxlen must be assigned.");
             
-            var dst = Download();
+            var dst = path ?? Download();
 
             var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt"));
             var x_train_string = new string[lines.Length];
@@ -55,7 +55,7 @@ public DatasetPass load_data(string path = "imdb.npz",
 
             var x_train = keras.preprocessing.sequence.pad_sequences(PraseData(x_train_string), maxlen: maxlen);
 
-            File.ReadAllLines(Path.Combine(dst, "imdb_test.txt"));
+            lines = File.ReadAllLines(Path.Combine(dst, "imdb_test.txt"));
             var x_test_string = new string[lines.Length];
             var y_test = np.zeros(new int[] { lines.Length }, np.int64);
             for (int i = 0; i < lines.Length; i++)

From ba1ddb44488bbb2f528065ac2be07e9e6965722e Mon Sep 17 00:00:00 2001
From: Haiping Chen <haiping008@gmail.com>
Date: Sat, 26 Aug 2023 11:20:12 -0500
Subject: [PATCH 06/77] Set SGD default value.

---
 src/TensorFlowNET.Core/Keras/IOptimizerApi.cs |  2 +-
 .../Tensorflow.Binding.csproj                 | 10 ++---
 .../Optimizers/OptimizerApi.cs                |  2 +-
 .../Tensorflow.Keras.csproj                   | 39 ++++++++++---------
 4 files changed, 28 insertions(+), 25 deletions(-)

diff --git a/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs b/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs
index 19e3a7b8c..6c15fd469 100644
--- a/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs
+++ b/src/TensorFlowNET.Core/Keras/IOptimizerApi.cs
@@ -63,6 +63,6 @@ IOptimizer RMSprop(float learning_rate = 0.001f,
                 bool centered = false,
                 string name = "RMSprop");
 
-        IOptimizer SGD(float learning_rate, float momentum);
+        IOptimizer SGD(float learning_rate = 0.01f, float momentum = 0f);
     }
 }
diff --git a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
index ca5aa47a9..babb52561 100644
--- a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
+++ b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
@@ -5,13 +5,13 @@
     <AssemblyName>Tensorflow.Binding</AssemblyName>
     <RootNamespace>Tensorflow</RootNamespace>
     <TargetTensorFlow>2.11.0</TargetTensorFlow>
-    <Version>0.110.2</Version>
+    <Version>0.110.3</Version>
     <LangVersion>10.0</LangVersion>
     <Nullable>enable</Nullable>
     <Authors>Haiping Chen, Eli Belash, Yaohui Liu, Meinrad Recheis</Authors>
     <Company>SciSharp STACK</Company>
     <GeneratePackageOnBuild>False</GeneratePackageOnBuild>
-    <Copyright>Apache 2.0, Haiping Chen $([System.DateTime]::UtcNow.ToString(yyyy))</Copyright>
+    <Copyright>Apache 2.0, Haiping Chen since 2018</Copyright>
     <RepositoryUrl>https://github.com/SciSharp/TensorFlow.NET</RepositoryUrl>
     <RepositoryType>git</RepositoryType>
     <PackageProjectUrl>http://scisharpstack.org</PackageProjectUrl>
@@ -20,7 +20,7 @@
     <Description>Google's TensorFlow full binding in .NET Standard.
 Building, training and infering deep learning models.
 https://tensorflownet.readthedocs.io</Description>
-    <AssemblyVersion>0.110.1.0</AssemblyVersion>
+    <AssemblyVersion>0.110.3.0</AssemblyVersion>
     <PackageReleaseNotes>
 		tf.net 0.110.x and above are based on tensorflow native 2.11.0
 		* Support RNN, LSTM model.
@@ -43,7 +43,7 @@ https://tensorflownet.readthedocs.io</Description>
 		tf.net 0.10x.x aligns with TensorFlow v2.10.x native library.
 		tf.net 0.11x.x aligns with TensorFlow v2.11.x native library.
 	</PackageReleaseNotes>
-    <FileVersion>0.110.2.0</FileVersion>
+    <FileVersion>0.110.3.0</FileVersion>
     <PackageLicenseFile>LICENSE</PackageLicenseFile>
     <PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
     <PackageOutputPath>packages</PackageOutputPath>
@@ -172,7 +172,7 @@ https://tensorflownet.readthedocs.io</Description>
   </ItemGroup>
 
   <ItemGroup>
-    <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.148" />
+    <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.149" />
     <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
     <PackageReference Include="OneOf" Version="3.0.255" />
     <PackageReference Include="Protobuf.Text" Version="0.7.1" />
diff --git a/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs b/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs
index affd43a4f..a237499f9 100644
--- a/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs
+++ b/src/TensorFlowNET.Keras/Optimizers/OptimizerApi.cs
@@ -71,7 +71,7 @@ public IOptimizer RMSprop(float learning_rate = 0.001f,
                 Name = name
             });
 
-        public IOptimizer SGD(float learning_rate, float momentum)
+        public IOptimizer SGD(float learning_rate = 0.01f, float momentum = 0f)
             => new SGD(learning_rate, momentum);
     }
 }
diff --git a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
index eeb7c559f..36d1bc1d4 100644
--- a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
+++ b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
@@ -7,27 +7,30 @@
     <Nullable>enable</Nullable>
     <RootNamespace>Tensorflow.Keras</RootNamespace>
     <Platforms>AnyCPU;x64</Platforms>
-    <Version>0.11.2</Version>
+    <Version>0.11.3</Version>
     <Authors>Haiping Chen</Authors>
     <Product>Keras for .NET</Product>
-    <Copyright>Apache 2.0, Haiping Chen 2023</Copyright>
+    <Copyright>Apache 2.0, Haiping Chen since 2018</Copyright>
     <PackageId>TensorFlow.Keras</PackageId>
     <PackageProjectUrl>https://github.com/SciSharp/TensorFlow.NET</PackageProjectUrl>
     <PackageIconUrl>https://avatars3.githubusercontent.com/u/44989469?s=200&amp;v=4</PackageIconUrl>
     <RepositoryUrl>https://github.com/SciSharp/TensorFlow.NET</RepositoryUrl>
-    <PackageReleaseNotes>Keras for .NET is a C# version of Keras ported from the python version.
-
-* Support CIFAR-10 dataset in keras.datasets.
-* Support Conv2D functional API.
-* Support BatchNormalization layer.
-* Building keras model in subclass, functional and sequential api
-* Implemented backward_function.
-* Support model.load_weights.
-* Add Subtract layer
-* Text preprocessing
-* Preprocessing.timeseries_dataset_from_array
-* Fixed memory leak for YOLOv3 model.
-* Support RNN and LSTM models</PackageReleaseNotes>
+    <PackageReleaseNotes>
+		Keras for .NET is a C# version of Keras ported from the python version.
+
+		* Support CIFAR-10 dataset in keras.datasets.
+		* Support Conv2D functional API.
+		* Support BatchNormalization layer.
+		* Building keras model in subclass, functional and sequential api
+		* Implemented backward_function.
+		* Support model.load_weights.
+		* Add Subtract layer
+		* Text preprocessing
+		* Preprocessing.timeseries_dataset_from_array
+		* Fixed memory leak for YOLOv3 model.
+		* Support RNN and LSTM models
+		* Support Transformer model
+	</PackageReleaseNotes>
     <Description>Keras for .NET
 
 Keras is an API designed for human beings, not machines. Keras follows best practices for reducing cognitive load: it offers consistent &amp; simple APIs, it minimizes the number of user actions required for common use cases, and it provides clear &amp; actionable error messages.</Description>
@@ -39,8 +42,8 @@ Keras is an API designed for human beings, not machines. Keras follows best prac
     <RepositoryType>Git</RepositoryType>
     <SignAssembly>False</SignAssembly>
     <AssemblyOriginatorKeyFile>Open.snk</AssemblyOriginatorKeyFile>
-    <AssemblyVersion>0.11.2.0</AssemblyVersion>
-    <FileVersion>0.11.2.0</FileVersion>
+    <AssemblyVersion>0.11.3.0</AssemblyVersion>
+    <FileVersion>0.11.3.0</FileVersion>
     <PackageLicenseFile>LICENSE</PackageLicenseFile>
     <Configurations>Debug;Release;GPU</Configurations>
   </PropertyGroup>
@@ -140,7 +143,7 @@ Keras is an API designed for human beings, not machines. Keras follows best prac
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="HDF5-CSharp" Version="1.17.0" />
+    <PackageReference Include="HDF5-CSharp" Version="1.18.0" />
     <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.149" />
     <PackageReference Include="SharpZipLib" Version="1.4.2" />
   </ItemGroup>

From 7b077eac7e6a9e60d9d34be9782e222317fbe353 Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Mon, 4 Sep 2023 00:05:22 +0800
Subject: [PATCH 07/77] feat: implement GRU layer

---
 .../Keras/ArgsDefinition/Rnn/GRUArgs.cs       |  29 +++
 .../ArgsDefinition/Rnn/GRUOptionalArgs.cs     |  13 ++
 .../Keras/Layers/ILayersApi.cs                |  19 ++
 src/TensorFlowNET.Keras/Layers/LayersApi.cs   |  61 ++++++-
 src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs     | 168 ++++++++++++++++++
 src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs     |  42 +----
 .../Layers/Rnn.Test.cs                        |   9 +
 7 files changed, 300 insertions(+), 41 deletions(-)
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs
 create mode 100644 src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs

diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUArgs.cs
new file mode 100644
index 000000000..cdc3097e9
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUArgs.cs
@@ -0,0 +1,29 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GRUArgs : AutoSerializeLayerArgs
+    {
+        public int Units { get; set; }
+        public Activation Activation { get; set; }
+        public Activation RecurrentActivation { get; set; }
+        public bool UseBias { get; set; } = true;
+        public float Dropout { get; set; } = .0f;
+        public float RecurrentDropout { get; set; } = .0f;
+        public IInitializer KernelInitializer { get; set; }
+        public IInitializer RecurrentInitializer { get; set; }
+        public IInitializer BiasInitializer { get; set; }
+        public bool ReturnSequences { get;set; }
+        public bool ReturnState { get;set; }
+        public bool GoBackwards { get;set; }
+        public bool Stateful { get;set; }
+        public bool Unroll { get;set; }
+        public bool TimeMajor { get;set; }
+        public bool ResetAfter { get;set; }
+        public int Implementation { get; set; } = 2;
+
+    }
+
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs
new file mode 100644
index 000000000..d441dc828
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs
@@ -0,0 +1,13 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition
+{
+    public class GRUOptionalArgs
+    {
+        public string Identifier => "GRU";
+
+        public Tensor Mask { get; set; } = null;
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
index b8aff5fb6..5e08eadc4 100644
--- a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
+++ b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
@@ -259,6 +259,25 @@ public IRnnCell GRUCell(
             float recurrent_dropout = 0f, 
             bool reset_after = true);
 
+        public ILayer GRU(
+            int units,
+            string activation = "tanh",
+            string recurrent_activation = "sigmoid",
+            bool use_bias = true,
+            string kernel_initializer = "glorot_uniform",
+            string recurrent_initializer = "orthogonal",
+            string bias_initializer = "zeros",
+            float dropout = 0f,
+            float recurrent_dropout = 0f,
+            bool return_sequences = false,
+            bool return_state = false,
+            bool go_backwards = false,
+            bool stateful = false,
+            bool unroll = false,
+            bool time_major = false,
+            bool reset_after = true
+            );
+
         /// <summary>
         /// Bidirectional wrapper for RNNs.
         /// </summary>
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
index 9155c7742..928e7e337 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
@@ -784,7 +784,7 @@ public IRnnCell LSTMCell(int uints,
             string recurrent_activation = "sigmoid",
             bool use_bias = true,
             string kernel_initializer = "glorot_uniform",
-            string recurrent_initializer = "orthogonal", // TODO(Wanglongzhi2001),glorot_uniform has not been developed.
+            string recurrent_initializer = "orthogonal",
             string bias_initializer = "zeros",
             bool unit_forget_bias = true,
             float dropout = 0f,
@@ -908,6 +908,65 @@ public IRnnCell GRUCell(
                 ResetAfter = reset_after
             });
 
+        /// <summary>
+        /// Gated Recurrent Unit - Cho et al. 2014.
+        /// </summary>
+        /// <param name="units">Positive integer, dimensionality of the output space.</param>
+        /// <param name="activation">Activation function to use. If you pass `None`, no activation is applied.(ie. "linear" activation: `a(x) = x`).</param>
+        /// <param name="recurrent_activation">Activation function to use for the recurrent step. If you pass `None`, no activation is applied. (ie. "linear" activation: `a(x) = x`).</param>
+        /// <param name="use_bias">Boolean, (default `True`), whether the layer uses a bias vector.</param>
+        /// <param name="kernel_initializer">Initializer for the `kernel` weights matrix, used for the linear transformation of the inputs. Default: `glorot_uniform`.</param>
+        /// <param name="recurrent_initializer">Initializer for the `recurrent_kernel` weights matrix, used for the linear transformation of the recurrent state. Default: `orthogonal`.</param>
+        /// <param name="bias_initializer">Initializer for the bias vector. Default: `zeros`.</param>
+        /// <param name="dropout">Float between 0 and 1. Fraction of the units to drop for the linear transformation of the inputs. Default: 0.</param>
+        /// <param name="recurrent_dropout">Float between 0 and 1. Fraction of the units to drop for the linear transformation of the recurrent state. Default: 0.</param>
+        /// <param name="implementation"></param>
+        /// <param name="return_sequences">Boolean. Whether to return the last output in the output sequence, or the full sequence. Default: `False`.</param>
+        /// <param name="return_state">Boolean. Whether to return the last state in addition to the output. Default: `False`.</param>
+        /// <param name="go_backwards">Boolean (default `False`). If True, process the input sequence backwards and return the reversed sequence.</param>
+        /// <param name="stateful">Boolean (default False). If True, the last state for each sample at index i in a batch will be used as initial state for the sample of index i in the following batch.</param>
+        /// <param name="unroll">Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. Unrolling can speed-up a RNN,</param>
+        /// <param name="time_major">The shape format of the `inputs` and `outputs` tensors.</param>
+        /// <param name="reset_after">GRU convention (whether to apply reset gate after or before matrix multiplication). False = "before", True = "after" (default and cuDNN compatible).</param>
+        /// <returns></returns>
+        public ILayer GRU(
+            int units,
+            string activation = "tanh",
+            string recurrent_activation = "sigmoid",
+            bool use_bias = true,
+            string kernel_initializer = "glorot_uniform",
+            string recurrent_initializer = "orthogonal",
+            string bias_initializer = "zeros",
+            float dropout = 0f,
+            float recurrent_dropout = 0f,
+            bool return_sequences = false,
+            bool return_state = false,
+            bool go_backwards = false,
+            bool stateful = false,
+            bool unroll = false,
+            bool time_major = false,
+            bool reset_after = true
+            )
+                => new GRU(new GRUArgs
+                {
+                    Units = units,
+                    Activation = keras.activations.GetActivationFromName(activation),
+                    RecurrentActivation = keras.activations.GetActivationFromName(recurrent_activation),
+                    KernelInitializer = GetInitializerByName(kernel_initializer),
+                    RecurrentInitializer = GetInitializerByName(recurrent_initializer),
+                    BiasInitializer = GetInitializerByName(bias_initializer),
+                    UseBias = use_bias,
+                    Dropout = dropout,
+                    RecurrentDropout = recurrent_dropout,
+                    ReturnSequences = return_sequences,
+                    ReturnState = return_state,
+                    GoBackwards = go_backwards,
+                    Stateful = stateful,
+                    TimeMajor = time_major,
+                    Unroll = unroll,
+                    ResetAfter = reset_after
+                });
+
         public ILayer Bidirectional(
         ILayer layer,
         string merge_mode = "concat",
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs b/src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs
new file mode 100644
index 000000000..0919883d2
--- /dev/null
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/GRU.cs
@@ -0,0 +1,168 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+using Tensorflow.Keras.ArgsDefinition;
+using Tensorflow.Common.Extensions;
+using Tensorflow.Common.Types;
+using Tensorflow.Keras.Saving;
+
+
+namespace Tensorflow.Keras.Layers
+{
+    public class GRU : RNN
+    {
+        GRUArgs _args;
+        private static GRUCell _cell;
+
+        bool _return_runtime;
+        public GRUCell Cell { get => _cell; }
+        public int units { get => _args.Units; }
+        public Activation activation { get => _args.Activation; }
+        public Activation recurrent_activation { get => _args.RecurrentActivation; }
+        public bool use_bias { get => _args.UseBias; }
+        public float dropout { get => _args.Dropout; }
+        public float recurrent_dropout { get => _args.RecurrentDropout; }
+        public IInitializer kernel_initializer { get => _args.KernelInitializer; }
+        public IInitializer recurrent_initializer { get => _args.RecurrentInitializer; }
+        public IInitializer bias_initializer { get => _args.BiasInitializer; }
+        public int implementation { get => _args.Implementation; }
+        public bool reset_after { get => _args.ResetAfter; }
+
+        public GRU(GRUArgs args) : base(CreateCell(args), PreConstruct(args))
+        {
+            _args = args;
+
+            if (_args.Implementation == 0)
+            {
+                // Use the red output to act as a warning message that can also be used under the release version
+                Console.ForegroundColor = ConsoleColor.Red; 
+                Console.WriteLine("Warning: `implementation=0` has been deprecated, "+
+                    "and now defaults to `implementation=2`."+
+                    "Please update your layer call.");
+                Console.ResetColor();
+            }
+
+            GRUCell cell = new GRUCell(new GRUCellArgs
+            {
+                Units = _args.Units,
+                Activation = _args.Activation,
+                RecurrentActivation = _args.RecurrentActivation,
+                UseBias = _args.UseBias,
+                Dropout = _args.Dropout,
+                RecurrentDropout = _args.RecurrentDropout,
+                KernelInitializer = _args.KernelInitializer,
+                RecurrentInitializer = _args.RecurrentInitializer,
+                BiasInitializer = _args.BiasInitializer,
+                ResetAfter = _args.ResetAfter,
+                Implementation = _args.Implementation
+            });
+            _cell = cell;
+        }
+
+        protected override Tensors Call(Tensors inputs, Tensors initial_state = null, bool? training = null, IOptionalArgs? optional_args = null)
+        {
+            GRUOptionalArgs? gru_optional_args = optional_args as GRUOptionalArgs;
+            if (optional_args is not null && gru_optional_args is null)
+            {
+                throw new ArgumentException("The type of optional args should be `GRUOptionalArgs`.");
+            }
+            Tensors? mask = gru_optional_args?.Mask;
+
+            // Not support ragger input temporarily;
+            int row_length = 0;
+            bool is_ragged_input = false;
+
+            _validate_args_if_ragged(is_ragged_input, mask);
+
+            // GRU does not support constants.Ignore it during process.
+             (inputs, initial_state, _) = this._process_inputs(inputs, initial_state, null);
+
+            if (mask.Length > 1)
+            {
+                mask = mask[0];
+            }
+
+            var input_shape = inputs.shape;
+            var timesteps = _args.TimeMajor ? input_shape[0] : input_shape[1];
+
+
+            // TODO(Wanglongzhi2001), finish _could_use_gpu_kernel part
+            Func<Tensors, Tensors, (Tensors, Tensors)> step = (cell_inputs, cell_states) =>
+            {
+                var res = Cell.Apply(cell_inputs, cell_states, training is null ? true : training.Value);
+                var (output, state) = res;
+                return (output, state);
+            };
+
+            var (last_output, outputs, states) = keras.backend.rnn(
+                step,
+                inputs,
+                initial_state,
+                constants: null,
+                go_backwards: _args.GoBackwards,
+                mask: mask,
+                unroll: _args.Unroll,
+                input_length: ops.convert_to_tensor(timesteps),
+                time_major: _args.TimeMajor,
+                zero_output_for_mask: base.Args.ZeroOutputForMask,
+                return_all_outputs: _args.ReturnSequences
+            );
+
+            Tensors output;
+            if (_args.ReturnSequences)
+            {
+                output = outputs;   
+            }
+            else
+            {
+                output = last_output;
+            }
+
+            if (_args.ReturnState)
+            {
+                output = new Tensors { output, states };
+            }
+            return output;
+        }
+
+        private static IRnnCell CreateCell(GRUArgs gruArgs)
+        {
+            return new GRUCell(new GRUCellArgs
+            {
+                Units = gruArgs.Units,
+                Activation = gruArgs.Activation,
+                RecurrentActivation = gruArgs.RecurrentActivation,
+                UseBias = gruArgs.UseBias,
+                Dropout = gruArgs.Dropout,
+                RecurrentDropout = gruArgs.RecurrentDropout,
+                KernelInitializer = gruArgs.KernelInitializer,
+                RecurrentInitializer = gruArgs.RecurrentInitializer,
+                BiasInitializer = gruArgs.BiasInitializer,
+                ResetAfter = gruArgs.ResetAfter,
+                Implementation = gruArgs.Implementation
+            });
+        }
+
+        private static RNNArgs PreConstruct(GRUArgs args)
+        {
+            return new RNNArgs
+            {
+                ReturnSequences = args.ReturnSequences,
+                ReturnState = args.ReturnState,
+                GoBackwards = args.GoBackwards,
+                Stateful = args.Stateful,
+                Unroll = args.Unroll,
+                TimeMajor = args.TimeMajor,
+                Units = args.Units,
+                Activation = args.Activation,
+                RecurrentActivation = args.RecurrentActivation,
+                UseBias = args.UseBias,
+                Dropout = args.Dropout,
+                RecurrentDropout = args.RecurrentDropout,
+                KernelInitializer = args.KernelInitializer,
+                RecurrentInitializer = args.RecurrentInitializer,
+                BiasInitializer = args.BiasInitializer
+            };
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs b/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs
index c19222614..fec75559c 100644
--- a/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs
+++ b/src/TensorFlowNET.Keras/Layers/Rnn/RNN.cs
@@ -25,8 +25,8 @@ public class RNN : RnnBase
         private RNNArgs _args;
         private object _input_spec = null; // or NoneValue??
         private object _state_spec = null;
-        private Tensors _states = null;
         private object _constants_spec = null;
+        private Tensors _states = null;
         private int _num_constants;
         protected IVariableV1 _kernel;
         protected IVariableV1 _bias;
@@ -469,7 +469,7 @@ public override Tensors Apply(Tensors inputs, Tensors initial_states = null, boo
             return (inputs, initial_state, constants);
         }
 
-        private void _validate_args_if_ragged(bool is_ragged_input, Tensors mask)
+        protected void _validate_args_if_ragged(bool is_ragged_input, Tensors mask)
         {
             if (!is_ragged_input)
             {
@@ -528,44 +528,6 @@ public Tensors __call__(Tensors inputs, Tensor state = null, Tensor training = n
             throw new NotImplementedException();
         }
 
-        // 好像不能cell不能传接口类型
-        //public RNN New(IRnnArgCell cell,
-        //    bool return_sequences = false,
-        //    bool return_state = false,
-        //    bool go_backwards = false,
-        //    bool stateful = false,
-        //    bool unroll = false,
-        //    bool time_major = false)
-        //        => new RNN(new RNNArgs
-        //        {
-        //            Cell = cell,
-        //            ReturnSequences = return_sequences,
-        //            ReturnState = return_state,
-        //            GoBackwards = go_backwards,
-        //            Stateful = stateful,
-        //            Unroll = unroll,
-        //            TimeMajor = time_major
-        //        });
-
-        //public RNN New(List<IRnnArgCell> cell,
-        //    bool return_sequences = false,
-        //    bool return_state = false,
-        //    bool go_backwards = false,
-        //    bool stateful = false,
-        //    bool unroll = false,
-        //    bool time_major = false)
-        //        => new RNN(new RNNArgs
-        //        {
-        //            Cell = cell,
-        //            ReturnSequences = return_sequences,
-        //            ReturnState = return_state,
-        //            GoBackwards = go_backwards,
-        //            Stateful = stateful,
-        //            Unroll = unroll,
-        //            TimeMajor = time_major
-        //        });
-
-
         protected Tensors get_initial_state(Tensors inputs)
         {
             var input = inputs[0];
diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs
index 03159346a..dbf5cae1e 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs
@@ -146,6 +146,15 @@ public void GRUCell()
 
         }
 
+        [TestMethod]
+        public void GRU()
+        {
+            var inputs = tf.ones((32, 10, 8));
+            var gru = tf.keras.layers.GRU(4);
+            var output = gru.Apply(inputs);
+            Assert.AreEqual((32, 4), output.shape);
+        }
+
         [TestMethod]
         public void Bidirectional()
         {

From 9d10daf30f02ebf078d56aadca59cc269ae23b4d Mon Sep 17 00:00:00 2001
From: lingbai-kong <oc371@tongji.edu.cn>
Date: Wed, 6 Sep 2023 23:12:00 +0800
Subject: [PATCH 08/77] add reconstruction and setstate of NDArray for loading
 pickled npy file.

---
 .../NumPy/DtypeConstructor.cs                 | 55 ++++++++---
 .../Implementation/NumPyImpl.Creation.cs      |  3 -
 .../NumPy/Implementation/NumPyImpl.load.cs    | 24 ++---
 .../NumPy/MultiArrayConstructor.cs            | 35 ++++---
 .../NumPy/NDArray.Pickle.cs                   | 99 ++++++++++++++++++-
 .../NumPy/NDArrayConverter.cs                 |  1 +
 src/TensorFlowNET.Core/Numpy/Numpy.cs         |  4 +-
 src/TensorFlowNET.Keras/Datasets/Imdb.cs      | 10 +-
 8 files changed, 178 insertions(+), 53 deletions(-)

diff --git a/src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs b/src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs
index f84f408e1..30ef82df4 100644
--- a/src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs
+++ b/src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs
@@ -16,25 +16,50 @@ class DtypeConstructor : IObjectConstructor
     {
         public object construct(object[] args)
         {
-            Console.WriteLine("DtypeConstructor");
-            Console.WriteLine(args.Length);
-            for (int i = 0; i < args.Length; i++)
-            {
-                Console.WriteLine(args[i]);
-            }
-            return new demo();
+            var typeCode = (string)args[0];
+            TF_DataType dtype;
+            if (typeCode == "b1")
+                dtype = np.@bool;
+            else if (typeCode == "i1")
+                dtype = np.@byte;
+            else if (typeCode == "i2")
+                dtype = np.int16;
+            else if (typeCode == "i4")
+                dtype = np.int32;
+            else if (typeCode == "i8")
+                dtype = np.int64;
+            else if (typeCode == "u1")
+                dtype = np.ubyte;
+            else if (typeCode == "u2")
+                dtype = np.uint16;
+            else if (typeCode == "u4")
+                dtype = np.uint32;
+            else if (typeCode == "u8")
+                dtype = np.uint64;
+            else if (typeCode == "f4")
+                dtype = np.float32;
+            else if (typeCode == "f8")
+                dtype = np.float64;
+            else if (typeCode.StartsWith("S"))
+                dtype = np.@string;
+            else if (typeCode.StartsWith("O"))
+                dtype = np.@object;
+            else
+                throw new NotSupportedException();
+            return new TF_DataType_Warpper(dtype);
         }
     }
-    class demo
+    public class TF_DataType_Warpper
     {
-        public void __setstate__(object[] args)
+        TF_DataType dtype { get; set; }
+        public TF_DataType_Warpper(TF_DataType dtype)
         {
-            Console.WriteLine("demo __setstate__");
-            Console.WriteLine(args.Length);
-            for (int i = 0; i < args.Length; i++)
-            {
-                Console.WriteLine(args[i]);
-            }
+            this.dtype = dtype;
+        }
+        public void __setstate__(object[] args) { }
+        public static implicit operator TF_DataType(TF_DataType_Warpper dtypeWarpper)
+        {
+            return dtypeWarpper.dtype;
         }
     }
 }
diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
index 80b62198a..7b79f83c6 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
@@ -99,9 +99,6 @@ Array ReadValueMatrix(BinaryReader reader, Array matrix, int bytes, Type type, i
 
         NDArray ReadObjectMatrix(BinaryReader reader, Array matrix, int[] shape)
         {
-            //int data = reader.ReadByte();
-            //Console.WriteLine(data);
-            //Console.WriteLine(reader.ReadByte());
             Stream stream = reader.BaseStream;
             Unpickler.registerConstructor("numpy.core.multiarray", "_reconstruct", new MultiArrayConstructor());
             Unpickler.registerConstructor("numpy", "dtype", new DtypeConstructor());
diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
index 789f119a1..bbe48e6a4 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
@@ -28,17 +28,17 @@ public Array LoadMatrix(Stream stream)
 
                 //if (type == typeof(String))
                 //return ReadStringMatrix(reader, matrix, bytes, type, shape);
-                NDArray res = ReadObjectMatrix(reader, matrix, shape);
-                Console.WriteLine("LoadMatrix");
-                Console.WriteLine(res.dims[0]);
-                Console.WriteLine((int)res[0][0]);
-                Console.WriteLine(res.dims[1]);
-                //if (type == typeof(Object))
-                //{
-
-                //}
-                //else 
-                return ReadValueMatrix(reader, matrix, bytes, type, shape);
+
+                if (type == typeof(Object))
+                {
+                    NDArray res = ReadObjectMatrix(reader, matrix, shape);
+                    // res = res.reconstructedNDArray;
+                    return res.reconstructedArray;
+                }
+                else
+                {
+                    return ReadValueMatrix(reader, matrix, bytes, type, shape);
+                }
             }
 
         }
@@ -133,7 +133,7 @@ Type GetType(string dtype, out int bytes, out bool? isLittleEndian)
                 return typeof(Double);
             if (typeCode.StartsWith("S"))
                 return typeof(String);
-            if (typeCode == "O")
+            if (typeCode.StartsWith("O"))
                 return typeof(Object);
 
             throw new NotSupportedException();
diff --git a/src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs b/src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs
index 92927cd5a..43eda23e0 100644
--- a/src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs
+++ b/src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs
@@ -3,6 +3,7 @@
 using System.Diagnostics.CodeAnalysis;
 using System.Text;
 using Razorvine.Pickle;
+using Razorvine.Pickle.Objects;
 
 namespace Tensorflow.NumPy
 {
@@ -17,28 +18,36 @@ public class MultiArrayConstructor : IObjectConstructor
     {
         public object construct(object[] args)
         {
-            //Console.WriteLine(args.Length);
-            //for (int i = 0; i < args.Length; i++)
-            //{
-            //    Console.WriteLine(args[i]);
-            //}
-            Console.WriteLine("MultiArrayConstructor");
-
+            if (args.Length != 3) 
+                throw new InvalidArgumentError($"Invalid number of arguments in MultiArrayConstructor._reconstruct. Expected three arguments. Given {args.Length} arguments.");
+            
+            var types = (ClassDictConstructor)args[0];
+            if (types.module != "numpy" || types.name != "ndarray") 
+                throw new RuntimeError("_reconstruct: First argument must be a sub-type of ndarray");
+            
             var arg1 = (Object[])args[1];
             var dims = new int[arg1.Length];
             for (var i = 0; i < arg1.Length; i++)
             {
                 dims[i] = (int)arg1[i];
             }
+            var shape = new Shape(dims);
 
-            var dtype = TF_DataType.DtInvalid;
-            switch (args[2])
+            TF_DataType dtype;
+            string identifier;
+            if (args[2].GetType() == typeof(string))
+                identifier = (string)args[2];
+            else
+                identifier = Encoding.UTF8.GetString((byte[])args[2]);
+            switch (identifier)
             {
-                case "b": dtype = TF_DataType.DtUint8Ref; break;
-                default: throw new NotImplementedException("cannot parse" + args[2]);
+                case "u": dtype = np.uint32; break;
+                case "c": dtype = np.complex_; break;
+                case "f": dtype = np.float32; break;
+                case "b": dtype = np.@bool; break;
+                default: throw new NotImplementedException($"Unsupported data type: {args[2]}");
             }
-            return new NDArray(new Shape(dims), dtype);
-
+            return new NDArray(shape, dtype);
         }
     }
 }
diff --git a/src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs b/src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs
index b4d66243a..62720826a 100644
--- a/src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs
+++ b/src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs
@@ -1,4 +1,7 @@
-﻿using System;
+﻿using Newtonsoft.Json.Linq;
+using Serilog.Debugging;
+using System;
+using System.Collections;
 using System.Collections.Generic;
 using System.Text;
 
@@ -6,14 +9,100 @@ namespace Tensorflow.NumPy
 {
     public partial class NDArray
     {
+        public NDArray reconstructedNDArray { get; set; }
+        public Array reconstructedArray { get; set; }
         public void __setstate__(object[] args)
         {
-            Console.WriteLine("NDArray __setstate__");
-            Console.WriteLine(args.Length);
-            for (int i = 0; i < args.Length; i++)
+            if (args.Length != 5)
+                throw new InvalidArgumentError($"Invalid number of arguments in NDArray.__setstate__. Expected five arguments. Given {args.Length} arguments.");
+
+            var version = (int)args[0]; // version
+
+            var arg1 = (Object[])args[1];
+            var dims = new int[arg1.Length];
+            for (var i = 0; i < arg1.Length; i++)
+            {
+                dims[i] = (int)arg1[i];
+            }
+            var _ShapeLike = new Shape(dims); // shape
+
+            TF_DataType _DType_co = (TF_DataType_Warpper)args[2]; // DType
+
+            var F_continuous = (bool)args[3]; // F-continuous
+            if (F_continuous)
+                throw new InvalidArgumentError("Fortran Continuous memory layout is not supported. Please use C-continuous layout or check the data format.");
+
+            var data = args[4]; // Data
+            /*
+             * If we ever need another pickle format, increment the version
+             * number. But we should still be able to handle the old versions.
+             */
+            if (version < 0 || version > 4)
+                throw new ValueError($"can't handle version {version} of numpy.dtype pickle");
+
+            // TODO: Implement the missing details and checks from the official Numpy C code here.
+            // https://github.com/numpy/numpy/blob/2f0bd6e86a77e4401d0384d9a75edf9470c5deb6/numpy/core/src/multiarray/descriptor.c#L2761
+
+            if (data.GetType() == typeof(ArrayList))
+            {
+                SetState((ArrayList)data);
+            }
+            else
+                throw new NotImplementedException("");
+        }
+        private void SetState(ArrayList arrayList)
+        {
+            int ndim = 1;
+            var subArrayList = arrayList;
+            while (subArrayList.Count > 0 && subArrayList[0] != null && subArrayList[0].GetType() == typeof(ArrayList))
+            {
+                subArrayList = (ArrayList)subArrayList[0];
+                ndim += 1;
+            }
+            var type = subArrayList[0].GetType();
+            if (type == typeof(int))
             {
-                Console.WriteLine(args[i]);
+                if (ndim == 1)
+                {
+                    int[] list = (int[])arrayList.ToArray(typeof(int));
+                    Shape shape = new Shape(new int[] { arrayList.Count });
+                    reconstructedArray = list;
+                    reconstructedNDArray = new NDArray(list, shape);
+                    //SetData(new[] { new Slice() }, new NDArray(list, shape));
+                    //set_shape(shape);
+                }
+                if (ndim == 2)
+                {
+                    int secondDim = 0;
+                    foreach (ArrayList subArray in arrayList)
+                    {
+                        secondDim = subArray.Count > secondDim ? subArray.Count : secondDim;
+                    }
+                    int[,] list = new int[arrayList.Count, secondDim];
+                    for (int i = 0; i < arrayList.Count; i++)
+                    {
+                        var subArray = (ArrayList?)arrayList[i];
+                        if (subArray == null)
+                            throw new NullReferenceException("");
+                        for (int j = 0; j < subArray.Count; j++)
+                        {
+                            var element = subArray[j];
+                            if (element == null)
+                                throw new NoNullAllowedException("the element of ArrayList cannot be null.");
+                            list[i,j] = (int) element;
+                        }
+                    }
+                    Shape shape = new Shape(new int[] { arrayList.Count, secondDim });
+                    reconstructedArray = list;
+                    reconstructedNDArray = new NDArray(list, shape);
+                    //SetData(new[] { new Slice() }, new NDArray(list, shape));
+                    //set_shape(shape);
+                }
+                if (ndim > 2)
+                    throw new NotImplementedException("can't handle ArrayList with more than two dimensions.");
             }
+            else
+                throw new NotImplementedException("");
         }
     }
 }
diff --git a/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs b/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs
index c8c2d45fa..4c64eba74 100644
--- a/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs
+++ b/src/TensorFlowNET.Core/NumPy/NDArrayConverter.cs
@@ -10,6 +10,7 @@ public class NDArrayConverter
         public unsafe static T Scalar<T>(NDArray nd) where T : unmanaged
             => nd.dtype switch
             {
+                TF_DataType.TF_BOOL => Scalar<T>(*(bool*)nd.data),
                 TF_DataType.TF_UINT8 => Scalar<T>(*(byte*)nd.data),
                 TF_DataType.TF_FLOAT => Scalar<T>(*(float*)nd.data),
                 TF_DataType.TF_INT32 => Scalar<T>(*(int*)nd.data),
diff --git a/src/TensorFlowNET.Core/Numpy/Numpy.cs b/src/TensorFlowNET.Core/Numpy/Numpy.cs
index 72d2e981c..fee2d63fc 100644
--- a/src/TensorFlowNET.Core/Numpy/Numpy.cs
+++ b/src/TensorFlowNET.Core/Numpy/Numpy.cs
@@ -43,7 +43,9 @@ public partial class np
     public static readonly TF_DataType @decimal = TF_DataType.TF_DOUBLE;
     public static readonly TF_DataType complex_ = TF_DataType.TF_COMPLEX;
     public static readonly TF_DataType complex64 = TF_DataType.TF_COMPLEX64;
-    public static readonly TF_DataType complex128 = TF_DataType.TF_COMPLEX128; 
+    public static readonly TF_DataType complex128 = TF_DataType.TF_COMPLEX128;
+    public static readonly TF_DataType @string = TF_DataType.TF_STRING;
+    public static readonly TF_DataType @object = TF_DataType.TF_VARIANT;
     #endregion
 
     public static double nan => double.NaN;
diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
index 016b352d9..6808035c6 100644
--- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs
+++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -70,7 +70,7 @@ namespace Tensorflow.Keras.Datasets
     public class Imdb
     {
         string origin_folder = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/";
-        string file_name = "imdb.npz";
+        string file_name = "simple.npz";
         string dest_folder = "imdb";
         /// <summary>
         /// Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/).
@@ -128,13 +128,15 @@ public DatasetPass load_data(string path = "imdb.npz",
 
         (NDArray, NDArray) LoadX(byte[] bytes)
         {
-            var y = np.Load_Npz<byte[]>(bytes);
-            return (y["x_train.npy"], y["x_test.npy"]);
+            var y = np.Load_Npz<int[,]>(bytes);
+            var x_train = y["x_train.npy"];
+            var x_test = y["x_test.npy"];
+            return (x_train, x_test);
         }
 
         (NDArray, NDArray) LoadY(byte[] bytes)
         {
-            var y = np.Load_Npz<long[]>(bytes);
+            var y = np.Load_Npz<int[]>(bytes);
             return (y["y_train.npy"], y["y_test.npy"]);
         }
 

From ea978bbf214a75ead94c568755255a6f3c6fed58 Mon Sep 17 00:00:00 2001
From: lingbai-kong <oc371@tongji.edu.cn>
Date: Thu, 7 Sep 2023 21:33:29 +0800
Subject: [PATCH 09/77] optimize code structure of reconstruction ndarray from
 pickled npy file

---
 .../Implementation/NumPyImpl.Creation.cs      | 12 ++----
 .../NumPy/Implementation/NumPyImpl.load.cs    | 10 +----
 .../NumPy/Pickle/DTypePickleWarpper.cs        | 20 ++++++++++
 .../NumPy/{ => Pickle}/DtypeConstructor.cs    | 17 +-------
 .../{ => Pickle}/MultiArrayConstructor.cs     | 14 +++----
 .../MultiArrayPickleWarpper.cs}               | 39 ++++++++++++-------
 src/TensorFlowNET.Core/tensorflow.cs          |  6 +++
 src/TensorFlowNET.Keras/Datasets/Imdb.cs      | 19 +++------
 .../Dataset/DatasetTest.cs                    |  6 +--
 9 files changed, 75 insertions(+), 68 deletions(-)
 create mode 100644 src/TensorFlowNET.Core/NumPy/Pickle/DTypePickleWarpper.cs
 rename src/TensorFlowNET.Core/NumPy/{ => Pickle}/DtypeConstructor.cs (77%)
 rename src/TensorFlowNET.Core/NumPy/{ => Pickle}/MultiArrayConstructor.cs (91%)
 rename src/TensorFlowNET.Core/NumPy/{NDArray.Pickle.cs => Pickle/MultiArrayPickleWarpper.cs} (77%)

diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
index 7b79f83c6..fa4ef0191 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
@@ -5,6 +5,7 @@
 using System.Text;
 using Tensorflow.Util;
 using Razorvine.Pickle;
+using Tensorflow.NumPy.Pickle;
 using static Tensorflow.Binding;
 
 namespace Tensorflow.NumPy
@@ -94,20 +95,15 @@ Array ReadValueMatrix(BinaryReader reader, Array matrix, int bytes, Type type, i
             
             var buffer = reader.ReadBytes(bytes * total);
             System.Buffer.BlockCopy(buffer, 0, matrix, 0, buffer.Length);
+
             return matrix;
         }
 
-        NDArray ReadObjectMatrix(BinaryReader reader, Array matrix, int[] shape)
+        Array ReadObjectMatrix(BinaryReader reader, Array matrix, int[] shape)
         {
             Stream stream = reader.BaseStream;
-            Unpickler.registerConstructor("numpy.core.multiarray", "_reconstruct", new MultiArrayConstructor());
-            Unpickler.registerConstructor("numpy", "dtype", new DtypeConstructor());
-
             var unpickler = new Unpickler();
-            
-            NDArray result = (NDArray) unpickler.load(stream);
-            Console.WriteLine(result.dims);
-            return result;
+            return (MultiArrayPickleWarpper)unpickler.load(stream);
         }
 
         public (NDArray, NDArray) meshgrid<T>(T[] array, bool copy = true, bool sparse = false)
diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
index bbe48e6a4..199e5ced3 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.load.cs
@@ -30,17 +30,12 @@ public Array LoadMatrix(Stream stream)
                 //return ReadStringMatrix(reader, matrix, bytes, type, shape);
 
                 if (type == typeof(Object))
-                {
-                    NDArray res = ReadObjectMatrix(reader, matrix, shape);
-                    // res = res.reconstructedNDArray;
-                    return res.reconstructedArray;
-                }
+                    return ReadObjectMatrix(reader, matrix, shape);
                 else
                 {
                     return ReadValueMatrix(reader, matrix, bytes, type, shape);
                 }
             }
-
         }
 
         public T Load<T>(Stream stream)
@@ -59,7 +54,7 @@ bool ParseReader(BinaryReader reader, out int bytes, out Type t, out int[] shape
             shape = null;
 
             // The first 6 bytes are a magic string: exactly "x93NUMPY"
-            if (reader.ReadByte() != 0x93) return false;
+            if (reader.ReadChar() != 63) return false;
             if (reader.ReadChar() != 'N') return false;
             if (reader.ReadChar() != 'U') return false;
             if (reader.ReadChar() != 'M') return false;
@@ -75,7 +70,6 @@ bool ParseReader(BinaryReader reader, out int bytes, out Type t, out int[] shape
             ushort len = reader.ReadUInt16();
 
             string header = new String(reader.ReadChars(len));
-            Console.WriteLine(header);
             string mark = "'descr': '";
             int s = header.IndexOf(mark) + mark.Length;
             int e = header.IndexOf("'", s + 1);
diff --git a/src/TensorFlowNET.Core/NumPy/Pickle/DTypePickleWarpper.cs b/src/TensorFlowNET.Core/NumPy/Pickle/DTypePickleWarpper.cs
new file mode 100644
index 000000000..5dff6c16b
--- /dev/null
+++ b/src/TensorFlowNET.Core/NumPy/Pickle/DTypePickleWarpper.cs
@@ -0,0 +1,20 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.NumPy.Pickle
+{
+    public class DTypePickleWarpper
+    {
+        TF_DataType dtype { get; set; }
+        public DTypePickleWarpper(TF_DataType dtype)
+        {
+            this.dtype = dtype;
+        }
+        public void __setstate__(object[] args) { }
+        public static implicit operator TF_DataType(DTypePickleWarpper dTypeWarpper)
+        {
+            return dTypeWarpper.dtype;
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs b/src/TensorFlowNET.Core/NumPy/Pickle/DtypeConstructor.cs
similarity index 77%
rename from src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs
rename to src/TensorFlowNET.Core/NumPy/Pickle/DtypeConstructor.cs
index 30ef82df4..160c7d4e9 100644
--- a/src/TensorFlowNET.Core/NumPy/DtypeConstructor.cs
+++ b/src/TensorFlowNET.Core/NumPy/Pickle/DtypeConstructor.cs
@@ -4,7 +4,7 @@
 using System.Text;
 using Razorvine.Pickle;
 
-namespace Tensorflow.NumPy
+namespace Tensorflow.NumPy.Pickle
 {
     /// <summary>
     /// 
@@ -46,20 +46,7 @@ public object construct(object[] args)
                 dtype = np.@object;
             else
                 throw new NotSupportedException();
-            return new TF_DataType_Warpper(dtype);
-        }
-    }
-    public class TF_DataType_Warpper
-    {
-        TF_DataType dtype { get; set; }
-        public TF_DataType_Warpper(TF_DataType dtype)
-        {
-            this.dtype = dtype;
-        }
-        public void __setstate__(object[] args) { }
-        public static implicit operator TF_DataType(TF_DataType_Warpper dtypeWarpper)
-        {
-            return dtypeWarpper.dtype;
+            return new DTypePickleWarpper(dtype);
         }
     }
 }
diff --git a/src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayConstructor.cs
similarity index 91%
rename from src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs
rename to src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayConstructor.cs
index 43eda23e0..885f368c4 100644
--- a/src/TensorFlowNET.Core/NumPy/MultiArrayConstructor.cs
+++ b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayConstructor.cs
@@ -5,7 +5,7 @@
 using Razorvine.Pickle;
 using Razorvine.Pickle.Objects;
 
-namespace Tensorflow.NumPy
+namespace Tensorflow.NumPy.Pickle
 {
     /// <summary>
     /// Creates multiarrays of objects. Returns a primitive type multiarray such as int[][] if 
@@ -18,14 +18,14 @@ public class MultiArrayConstructor : IObjectConstructor
     {
         public object construct(object[] args)
         {
-            if (args.Length != 3) 
+            if (args.Length != 3)
                 throw new InvalidArgumentError($"Invalid number of arguments in MultiArrayConstructor._reconstruct. Expected three arguments. Given {args.Length} arguments.");
-            
+
             var types = (ClassDictConstructor)args[0];
-            if (types.module != "numpy" || types.name != "ndarray") 
+            if (types.module != "numpy" || types.name != "ndarray")
                 throw new RuntimeError("_reconstruct: First argument must be a sub-type of ndarray");
-            
-            var arg1 = (Object[])args[1];
+
+            var arg1 = (object[])args[1];
             var dims = new int[arg1.Length];
             for (var i = 0; i < arg1.Length; i++)
             {
@@ -47,7 +47,7 @@ public object construct(object[] args)
                 case "b": dtype = np.@bool; break;
                 default: throw new NotImplementedException($"Unsupported data type: {args[2]}");
             }
-            return new NDArray(shape, dtype);
+            return new MultiArrayPickleWarpper(shape, dtype);
         }
     }
 }
diff --git a/src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayPickleWarpper.cs
similarity index 77%
rename from src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs
rename to src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayPickleWarpper.cs
index 62720826a..af8d1ecc2 100644
--- a/src/TensorFlowNET.Core/NumPy/NDArray.Pickle.cs
+++ b/src/TensorFlowNET.Core/NumPy/Pickle/MultiArrayPickleWarpper.cs
@@ -5,12 +5,19 @@
 using System.Collections.Generic;
 using System.Text;
 
-namespace Tensorflow.NumPy
+namespace Tensorflow.NumPy.Pickle
 {
-    public partial class NDArray
+    public class MultiArrayPickleWarpper
     {
+        public Shape reconstructedShape { get; set; }
+        public TF_DataType reconstructedDType { get; set; }
         public NDArray reconstructedNDArray { get; set; }
-        public Array reconstructedArray { get; set; }
+        public Array reconstructedMultiArray { get; set; }
+        public MultiArrayPickleWarpper(Shape shape, TF_DataType dtype)
+        {
+            reconstructedShape = shape;
+            reconstructedDType = dtype;
+        }
         public void __setstate__(object[] args)
         {
             if (args.Length != 5)
@@ -18,7 +25,7 @@ public void __setstate__(object[] args)
 
             var version = (int)args[0]; // version
 
-            var arg1 = (Object[])args[1];
+            var arg1 = (object[])args[1];
             var dims = new int[arg1.Length];
             for (var i = 0; i < arg1.Length; i++)
             {
@@ -26,7 +33,7 @@ public void __setstate__(object[] args)
             }
             var _ShapeLike = new Shape(dims); // shape
 
-            TF_DataType _DType_co = (TF_DataType_Warpper)args[2]; // DType
+            TF_DataType _DType_co = (DTypePickleWarpper)args[2]; // DType
 
             var F_continuous = (bool)args[3]; // F-continuous
             if (F_continuous)
@@ -45,12 +52,12 @@ public void __setstate__(object[] args)
 
             if (data.GetType() == typeof(ArrayList))
             {
-                SetState((ArrayList)data);
+                Reconstruct((ArrayList)data);
             }
             else
                 throw new NotImplementedException("");
         }
-        private void SetState(ArrayList arrayList)
+        private void Reconstruct(ArrayList arrayList)
         {
             int ndim = 1;
             var subArrayList = arrayList;
@@ -66,10 +73,8 @@ private void SetState(ArrayList arrayList)
                 {
                     int[] list = (int[])arrayList.ToArray(typeof(int));
                     Shape shape = new Shape(new int[] { arrayList.Count });
-                    reconstructedArray = list;
+                    reconstructedMultiArray = list;
                     reconstructedNDArray = new NDArray(list, shape);
-                    //SetData(new[] { new Slice() }, new NDArray(list, shape));
-                    //set_shape(shape);
                 }
                 if (ndim == 2)
                 {
@@ -89,14 +94,12 @@ private void SetState(ArrayList arrayList)
                             var element = subArray[j];
                             if (element == null)
                                 throw new NoNullAllowedException("the element of ArrayList cannot be null.");
-                            list[i,j] = (int) element;
+                            list[i, j] = (int)element;
                         }
                     }
                     Shape shape = new Shape(new int[] { arrayList.Count, secondDim });
-                    reconstructedArray = list;
+                    reconstructedMultiArray = list;
                     reconstructedNDArray = new NDArray(list, shape);
-                    //SetData(new[] { new Slice() }, new NDArray(list, shape));
-                    //set_shape(shape);
                 }
                 if (ndim > 2)
                     throw new NotImplementedException("can't handle ArrayList with more than two dimensions.");
@@ -104,5 +107,13 @@ private void SetState(ArrayList arrayList)
             else
                 throw new NotImplementedException("");
         }
+        public static implicit operator Array(MultiArrayPickleWarpper arrayWarpper)
+        {
+            return arrayWarpper.reconstructedMultiArray;
+        }
+        public static implicit operator NDArray(MultiArrayPickleWarpper arrayWarpper)
+        {
+            return arrayWarpper.reconstructedNDArray;
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/tensorflow.cs b/src/TensorFlowNET.Core/tensorflow.cs
index dc4e48da8..e368b37cd 100644
--- a/src/TensorFlowNET.Core/tensorflow.cs
+++ b/src/TensorFlowNET.Core/tensorflow.cs
@@ -14,6 +14,7 @@ You may obtain a copy of the License at
    limitations under the License.
 ******************************************************************************/
 
+using Razorvine.Pickle;
 using Serilog;
 using Serilog.Core;
 using System.Reflection;
@@ -22,6 +23,7 @@ limitations under the License.
 using Tensorflow.Eager;
 using Tensorflow.Gradients;
 using Tensorflow.Keras;
+using Tensorflow.NumPy.Pickle;
 
 namespace Tensorflow
 {
@@ -98,6 +100,10 @@ public tensorflow()
                     "please visit https://github.com/SciSharp/TensorFlow.NET. If it still not work after installing the backend, please submit an " +
                     "issue to https://github.com/SciSharp/TensorFlow.NET/issues");
             }
+
+            // register numpy reconstructor for pickle
+            Unpickler.registerConstructor("numpy.core.multiarray", "_reconstruct", new MultiArrayConstructor());
+            Unpickler.registerConstructor("numpy", "dtype", new DtypeConstructor());
         }
 
         public string VERSION => c_api.StringPiece(c_api.TF_Version());
diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
index 6808035c6..a992ae84a 100644
--- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs
+++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -5,13 +5,6 @@
 using Tensorflow.Keras.Utils;
 using Tensorflow.NumPy;
 using System.Linq;
-using Google.Protobuf.Collections;
-using Microsoft.VisualBasic;
-using OneOf.Types;
-using static HDF.PInvoke.H5;
-using System.Data;
-using System.Reflection.Emit;
-using System.Xml.Linq;
 
 namespace Tensorflow.Keras.Datasets
 {
@@ -70,8 +63,9 @@ namespace Tensorflow.Keras.Datasets
     public class Imdb
     {
         string origin_folder = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/";
-        string file_name = "simple.npz";
+        string file_name = "imdb.npz";
         string dest_folder = "imdb";
+
         /// <summary>
         /// Loads the [IMDB dataset](https://ai.stanford.edu/~amaas/data/sentiment/).
         /// </summary>
@@ -95,8 +89,9 @@ public DatasetPass load_data(string path = "imdb.npz",
         {
             var dst = Download();
             var fileBytes = File.ReadAllBytes(Path.Combine(dst, file_name));
-            var (x_train, x_test) = LoadX(fileBytes);
             var (y_train, y_test) = LoadY(fileBytes);
+            var (x_train, x_test) = LoadX(fileBytes);
+            
             /*var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt"));
             var x_train_string = new string[lines.Length];
             var y_train = np.zeros(new int[] { lines.Length }, np.int64);
@@ -129,14 +124,12 @@ public DatasetPass load_data(string path = "imdb.npz",
         (NDArray, NDArray) LoadX(byte[] bytes)
         {
             var y = np.Load_Npz<int[,]>(bytes);
-            var x_train = y["x_train.npy"];
-            var x_test = y["x_test.npy"];
-            return (x_train, x_test);
+            return (y["x_train.npy"], y["x_test.npy"]);
         }
 
         (NDArray, NDArray) LoadY(byte[] bytes)
         {
-            var y = np.Load_Npz<int[]>(bytes);
+            var y = np.Load_Npz<long[]>(bytes);
             return (y["y_train.npy"], y["y_test.npy"]);
         }
 
diff --git a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
index 778290bb8..db6252efc 100644
--- a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
+++ b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
@@ -1,6 +1,5 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
-using System.Collections.Generic;
 using System.Linq;
 using static Tensorflow.Binding;
 using static Tensorflow.KerasApi;
@@ -197,6 +196,7 @@ public void Shuffle()
 
             Assert.IsFalse(allEqual);
         }
+        [Ignore]
         [TestMethod]
         public void GetData()
         {
@@ -209,8 +209,8 @@ public void GetData()
             var y_val = dataset.Test.Item2;
             print(len(x_train) + "Training sequences");
             print(len(x_val) + "Validation sequences");
-            x_train = keras.preprocessing.sequence.pad_sequences((IEnumerable<int[]>)x_train, maxlen: maxlen);
-            x_val = keras.preprocessing.sequence.pad_sequences((IEnumerable<int[]>)x_val, maxlen: maxlen);
+            //x_train = keras.preprocessing.sequence.pad_sequences((IEnumerable<int[]>)x_train, maxlen: maxlen);
+            //x_val = keras.preprocessing.sequence.pad_sequences((IEnumerable<int[]>)x_val, maxlen: maxlen);
         }
     }
 }

From 28c77f53d64dbe78284bf46b00c8c945d76fb31c Mon Sep 17 00:00:00 2001
From: lingbai-kong <oc371@tongji.edu.cn>
Date: Fri, 8 Sep 2023 17:38:54 +0800
Subject: [PATCH 10/77] implement Imdb dataset loader

---
 .../NumPy/Implementation/RandomizedImpl.cs    |   4 +-
 src/TensorFlowNET.Keras/Datasets/Imdb.cs      | 186 ++++++++++++------
 src/TensorFlowNET.Keras/Utils/data_utils.cs   |  47 +++++
 .../Dataset/DatasetTest.cs                    |  28 ++-
 4 files changed, 198 insertions(+), 67 deletions(-)

diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs b/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs
index 064c7362f..a707e8aae 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/RandomizedImpl.cs
@@ -14,9 +14,9 @@ public class RandomizedImpl
         public NDArray permutation(NDArray x) => new NDArray(random_ops.random_shuffle(x));
 
         [AutoNumPy]
-        public void shuffle(NDArray x)
+        public void shuffle(NDArray x, int? seed = null)
         {
-            var y = random_ops.random_shuffle(x);
+            var y = random_ops.random_shuffle(x, seed);
             Marshal.Copy(y.BufferToArray(), 0, x.TensorDataPointer, (int)x.bytesize);
         }
 
diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
index 68364ea67..0266b48bd 100644
--- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs
+++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -3,8 +3,6 @@
 using System.IO;
 using System.Text;
 using Tensorflow.Keras.Utils;
-using Tensorflow.NumPy;
-using System.Linq;
 
 namespace Tensorflow.Keras.Datasets
 {
@@ -41,14 +39,14 @@ namespace Tensorflow.Keras.Datasets
     ///     `skip_top` limits will be replaced with this character.
     /// index_from: int. Index actual words with this index and higher.
     ///     Returns:
-    /// Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`.
+    /// Tuple of Numpy arrays: `(x_train, labels_train), (x_test, labels_test)`.
     /// 
     /// ** x_train, x_test**: lists of sequences, which are lists of indexes
     ///     (integers). If the num_words argument was specific, the maximum
     ///     possible index value is `num_words - 1`. If the `maxlen` argument was
     ///     specified, the largest possible sequence length is `maxlen`.
     /// 
-    /// ** y_train, y_test**: lists of integer labels(1 or 0).
+    /// ** labels_train, labels_test**: lists of integer labels(1 or 0).
     /// 
     /// Raises:
     /// ValueError: in case `maxlen` is so low
@@ -63,7 +61,6 @@ namespace Tensorflow.Keras.Datasets
     public class Imdb
     {
         string origin_folder = "https://storage.googleapis.com/tensorflow/tf-keras-datasets/";
-        string file_name = "imdb.npz";
         string dest_folder = "imdb";
 
         /// <summary>
@@ -78,43 +75,139 @@ public class Imdb
         /// <param name="oov_char"></param>
         /// <param name="index_from"></param>
         /// <returns></returns>
-        public DatasetPass load_data(string? path = "imdb.npz",
-            int num_words = -1,
+        public DatasetPass load_data(
+            string path = "imdb.npz",
+            int? num_words = null,
             int skip_top = 0,
-            int maxlen = -1,
+            int? maxlen = null,
             int seed = 113,
-            int start_char = 1,
-            int oov_char= 2,
+            int? start_char = 1,
+            int? oov_char = 2,
             int index_from = 3)
         {
-            if (maxlen == -1) throw new InvalidArgumentError("maxlen must be assigned.");
-            
-            var dst = path ?? Download();
-            var fileBytes = File.ReadAllBytes(Path.Combine(dst, file_name));
-            var (y_train, y_test) = LoadY(fileBytes);
+            path = data_utils.get_file(
+                path,
+                origin: Path.Combine(origin_folder, "imdb.npz"),
+                file_hash: "69664113be75683a8fe16e3ed0ab59fda8886cb3cd7ada244f7d9544e4676b9f"
+            );
+            path = Path.Combine(path, "imdb.npz");
+            var fileBytes = File.ReadAllBytes(path);
             var (x_train, x_test) = LoadX(fileBytes);
-            
-            /*var lines = File.ReadAllLines(Path.Combine(dst, "imdb_train.txt"));
-            var x_train_string = new string[lines.Length];
-            var y_train = np.zeros(new int[] { lines.Length }, np.int64);
-            for (int i = 0; i < lines.Length; i++)
+            var (labels_train, labels_test) = LoadY(fileBytes);
+            x_test.astype(np.int32);
+            labels_test.astype(np.int32);
+
+            var indices = np.arange<int>(len(x_train));
+            np.random.shuffle(indices, seed);
+            x_train = x_train[indices];
+            labels_train = labels_train[indices];
+
+            indices = np.arange<int>(len(x_test));
+            np.random.shuffle(indices, seed);
+            x_test = x_test[indices];
+            labels_test = labels_test[indices];
+
+            if (start_char != null)
+            {
+                int[,] new_x_train = new int[x_train.shape[0], x_train.shape[1] + 1];
+                for (var i = 0; i < x_train.shape[0]; i++)
+                {
+                    new_x_train[i, 0] = (int)start_char;
+                    for (var j = 0; j < x_train.shape[1]; j++)
+                    {
+                        new_x_train[i, j + 1] = x_train[i][j];
+                    }
+                }
+                int[,] new_x_test = new int[x_test.shape[0], x_test.shape[1] + 1];
+                for (var i = 0; i < x_test.shape[0]; i++)
+                {
+                    new_x_test[i, 0] = (int)start_char;
+                    for (var j = 0; j < x_test.shape[1]; j++)
+                    {
+                        new_x_test[i, j + 1] = x_test[i][j];
+                    }
+                }
+                x_train = new NDArray(new_x_train);
+                x_test = new NDArray(new_x_test);
+            }
+            else if (index_from != 0)
+            {
+                for (var i = 0; i < x_train.shape[0]; i++)
+                {
+                    for (var j = 0; j < x_train.shape[1]; j++)
+                    {
+                        if (x_train[i, j] != 0)
+                            x_train[i, j] += index_from;
+                    }
+                }
+                for (var i = 0; i < x_test.shape[0]; i++)
+                {
+                    for (var j = 0; j < x_test.shape[1]; j++)
+                    {
+                        if (x_test[i, j] != 0)
+                            x_test[i, j] += index_from;
+                    }
+                }
+            }
+
+            if (maxlen != null)
             {
-                y_train[i] = long.Parse(lines[i].Substring(0, 1));
-                x_train_string[i] = lines[i].Substring(2);
+                (x_train, labels_train) = data_utils._remove_long_seq((int)maxlen, x_train, labels_train);
+                (x_test, labels_test) = data_utils._remove_long_seq((int)maxlen, x_test, labels_test);
+                if (x_train.size == 0 || x_test.size == 0)
+                    throw new ValueError("After filtering for sequences shorter than maxlen=" +
+                        $"{maxlen}, no sequence was kept. Increase maxlen.");
             }
 
-            var x_train = keras.preprocessing.sequence.pad_sequences(PraseData(x_train_string), maxlen: maxlen);
+            var xs = np.concatenate(new[] { x_train, x_test });
+            var labels = np.concatenate(new[] { labels_train, labels_test });
 
-            lines = File.ReadAllLines(Path.Combine(dst, "imdb_test.txt"));
-            var x_test_string = new string[lines.Length];
-            var y_test = np.zeros(new int[] { lines.Length }, np.int64);
-            for (int i = 0; i < lines.Length; i++)
+            if(num_words == null)
             {
-                y_test[i] = long.Parse(lines[i].Substring(0, 1));
-                x_test_string[i] = lines[i].Substring(2);
+                num_words = 0;
+                for (var i = 0; i < xs.shape[0]; i++)
+                    for (var j = 0; j < xs.shape[1]; j++)
+                        num_words = max((int)num_words, (int)xs[i][j]);
             }
 
-            var x_test = np.array(x_test_string);*/
+            // by convention, use 2 as OOV word
+            // reserve 'index_from' (=3 by default) characters:
+            // 0 (padding), 1 (start), 2 (OOV)
+            if (oov_char != null)
+            {
+                int[,] new_xs = new int[xs.shape[0], xs.shape[1]];
+                for(var i = 0; i < xs.shape[0]; i++)
+                {
+                    for(var j = 0; j < xs.shape[1]; j++)
+                    {
+                        if ((int)xs[i][j] == 0 || skip_top <= (int)xs[i][j] && (int)xs[i][j] < num_words)
+                            new_xs[i, j] = (int)xs[i][j];
+                        else
+                            new_xs[i, j] = (int)oov_char;
+                    }
+                }
+                xs = new NDArray(new_xs);
+            }
+            else
+            {
+                int[,] new_xs = new int[xs.shape[0], xs.shape[1]];
+                for (var i = 0; i < xs.shape[0]; i++)
+                {
+                    int k = 0;
+                    for (var j = 0; j < xs.shape[1]; j++)
+                    {
+                        if ((int)xs[i][j] == 0 || skip_top <= (int)xs[i][j] && (int)xs[i][j] < num_words)
+                            new_xs[i, k++] = (int)xs[i][j];
+                    }
+                }
+                xs = new NDArray(new_xs);
+            }
+
+            var idx = len(x_train);
+            x_train = xs[$"0:{idx}"];
+            x_test = xs[$"{idx}:"];
+            var y_train = labels[$"0:{idx}"];
+            var y_test = labels[$"{idx}:"];
 
             return new DatasetPass
             {
@@ -125,8 +218,8 @@ public DatasetPass load_data(string? path = "imdb.npz",
 
         (NDArray, NDArray) LoadX(byte[] bytes)
         {
-            var y = np.Load_Npz<int[,]>(bytes);
-            return (y["x_train.npy"], y["x_test.npy"]);
+            var x = np.Load_Npz<int[,]>(bytes);
+            return (x["x_train.npy"], x["x_test.npy"]);
         }
 
         (NDArray, NDArray) LoadY(byte[] bytes)
@@ -134,34 +227,5 @@ public DatasetPass load_data(string? path = "imdb.npz",
             var y = np.Load_Npz<long[]>(bytes);
             return (y["y_train.npy"], y["y_test.npy"]);
         }
-
-        string Download()
-        {
-            var dst = Path.Combine(Path.GetTempPath(), dest_folder);
-            Directory.CreateDirectory(dst);
-
-            Web.Download(origin_folder + file_name, dst, file_name);
-
-            return dst;
-            // return Path.Combine(dst, file_name);
-        }
-
-        protected IEnumerable<int[]> PraseData(string[] x)
-        {
-            var data_list = new List<int[]>();
-            for (int i = 0; i < len(x); i++)
-            {
-                var list_string = x[i];
-                var cleaned_list_string = list_string.Replace("[", "").Replace("]", "").Replace(" ", "");
-                string[] number_strings = cleaned_list_string.Split(',');
-                int[] numbers = new int[number_strings.Length];
-                for (int j = 0; j < number_strings.Length; j++)
-                {
-                    numbers[j] = int.Parse(number_strings[j]);
-    }
-                data_list.Add(numbers);
-            }
-            return data_list;
-        }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Utils/data_utils.cs b/src/TensorFlowNET.Keras/Utils/data_utils.cs
index 5b84c601f..16b121b07 100644
--- a/src/TensorFlowNET.Keras/Utils/data_utils.cs
+++ b/src/TensorFlowNET.Keras/Utils/data_utils.cs
@@ -39,5 +39,52 @@ public static string get_file(string fname, string origin,
 
             return datadir;
         }
+
+        public static (NDArray, NDArray) _remove_long_seq(int maxlen, NDArray seq, NDArray label)
+        {
+            /*Removes sequences that exceed the maximum length.
+
+            Args:
+                maxlen: Int, maximum length of the output sequences.
+                seq: List of lists, where each sublist is a sequence.
+                label: List where each element is an integer.
+
+            Returns:
+                    new_seq, new_label: shortened lists for `seq` and `label`.
+
+            */
+            List<int[]> new_seq = new List<int[]>();
+            List<int> new_label = new List<int>();
+
+            for (var i = 0; i < seq.shape[0]; i++)
+            {
+                if (maxlen < seq.shape[1] && seq[i][maxlen] != 0)
+                    continue;
+                int[] sentence = new int[maxlen];
+                for (var j = 0; j < maxlen && j < seq.shape[1]; j++)
+                {
+                    sentence[j] = seq[i, j];
+                }
+                new_seq.Add(sentence);
+                new_label.Add(label[i]);
+            }
+
+            int[,] new_seq_array = new int[new_seq.Count, maxlen];
+            int[] new_label_array = new int[new_label.Count];
+
+            for (var i = 0; i < new_seq.Count; i++)
+            {
+                for (var j = 0; j < maxlen; j++)
+                {
+                    new_seq_array[i, j] = new_seq[i][j];
+                }
+            }
+
+            for (var i = 0; i < new_label.Count; i++)
+            {
+                new_label_array[i] = new_label[i];
+            }
+            return (new_seq_array, new_label_array);
+        }
     }
 }
diff --git a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
index db6252efc..251eeff90 100644
--- a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
+++ b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
@@ -1,6 +1,8 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
+using System.Collections.Generic;
 using System.Linq;
+using Tensorflow.NumPy;
 using static Tensorflow.Binding;
 using static Tensorflow.KerasApi;
 
@@ -207,10 +209,28 @@ public void GetData()
             var y_train = dataset.Train.Item2;
             var x_val = dataset.Test.Item1;
             var y_val = dataset.Test.Item2;
-            print(len(x_train) + "Training sequences");
-            print(len(x_val) + "Validation sequences");
-            //x_train = keras.preprocessing.sequence.pad_sequences((IEnumerable<int[]>)x_train, maxlen: maxlen);
-            //x_val = keras.preprocessing.sequence.pad_sequences((IEnumerable<int[]>)x_val, maxlen: maxlen);
+
+            x_train = keras.preprocessing.sequence.pad_sequences(RemoveZeros(x_train), maxlen: maxlen);
+            x_val = keras.preprocessing.sequence.pad_sequences(RemoveZeros(x_val), maxlen: maxlen);
+            print(len(x_train) + " Training sequences");
+            print(len(x_val) + " Validation sequences");
+        }
+        IEnumerable<int[]> RemoveZeros(NDArray data)
+        {
+            List<int[]> new_data = new List<int[]>();
+            for (var i = 0; i < data.shape[0]; i++)
+            {
+                List<int> new_array = new List<int>();
+                for (var j = 0; j < data.shape[1]; j++)
+                {
+                    if (data[i][j] == 0)
+                        break;
+                    else
+                        new_array.Add((int)data[i][j]);
+                }
+                new_data.Add(new_array.ToArray());
+            }
+            return new_data;
         }
     }
 }

From f57a6fe6ed006f79511f4cc9550eeda312b11e98 Mon Sep 17 00:00:00 2001
From: lingbai-kong <oc371@tongji.edu.cn>
Date: Sat, 9 Sep 2023 18:31:46 +0800
Subject: [PATCH 11/77] optimize the time complexity of Imdb dataset loader

---
 src/TensorFlowNET.Keras/Datasets/Imdb.cs      | 101 ++++++++++--------
 src/TensorFlowNET.Keras/Utils/data_utils.cs   |  16 +--
 .../Dataset/DatasetTest.cs                    |  11 +-
 3 files changed, 71 insertions(+), 57 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
index 0266b48bd..49fc79251 100644
--- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs
+++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -94,8 +94,6 @@ public DatasetPass load_data(
             var fileBytes = File.ReadAllBytes(path);
             var (x_train, x_test) = LoadX(fileBytes);
             var (labels_train, labels_test) = LoadY(fileBytes);
-            x_test.astype(np.int32);
-            labels_test.astype(np.int32);
 
             var indices = np.arange<int>(len(x_train));
             np.random.shuffle(indices, seed);
@@ -107,67 +105,80 @@ public DatasetPass load_data(
             x_test = x_test[indices];
             labels_test = labels_test[indices];
 
+            var x_train_array = (int[,])x_train.ToMultiDimArray<int>();
+            var x_test_array = (int[,])x_test.ToMultiDimArray<int>();
+            var labels_train_array = (long[])labels_train.ToArray<long>();
+            var labels_test_array = (long[])labels_test.ToArray<long>();
+
             if (start_char != null)
             {
-                int[,] new_x_train = new int[x_train.shape[0], x_train.shape[1] + 1];
-                for (var i = 0; i < x_train.shape[0]; i++)
+                int[,] new_x_train_array = new int[x_train_array.GetLength(0), x_train_array.GetLength(1) + 1];
+                for (var i = 0; i < x_train_array.GetLength(0); i++)
                 {
-                    new_x_train[i, 0] = (int)start_char;
-                    for (var j = 0; j < x_train.shape[1]; j++)
+                    new_x_train_array[i, 0] = (int)start_char;
+                    for (var j = 0; j < x_train_array.GetLength(1); j++)
                     {
-                        new_x_train[i, j + 1] = x_train[i][j];
+                        if (x_train_array[i, j] == 0)
+                            break;
+                        new_x_train_array[i, j + 1] = x_train_array[i, j];
                     }
                 }
-                int[,] new_x_test = new int[x_test.shape[0], x_test.shape[1] + 1];
-                for (var i = 0; i < x_test.shape[0]; i++)
+                int[,] new_x_test_array = new int[x_test_array.GetLength(0), x_test_array.GetLength(1) + 1];
+                for (var i = 0; i < x_test_array.GetLength(0); i++)
                 {
-                    new_x_test[i, 0] = (int)start_char;
-                    for (var j = 0; j < x_test.shape[1]; j++)
+                    new_x_test_array[i, 0] = (int)start_char;
+                    for (var j = 0; j < x_test_array.GetLength(1); j++)
                     {
-                        new_x_test[i, j + 1] = x_test[i][j];
+                        if (x_test_array[i, j] == 0)
+                            break;
+                        new_x_test_array[i, j + 1] = x_test_array[i, j];
                     }
                 }
-                x_train = new NDArray(new_x_train);
-                x_test = new NDArray(new_x_test);
+                x_train_array = new_x_train_array;
+                x_test_array = new_x_test_array;
             }
             else if (index_from != 0)
             {
-                for (var i = 0; i < x_train.shape[0]; i++)
+                for (var i = 0; i < x_train_array.GetLength(0); i++)
                 {
-                    for (var j = 0; j < x_train.shape[1]; j++)
+                    for (var j = 0; j < x_train_array.GetLength(1); j++)
                     {
-                        if (x_train[i, j] != 0)
-                            x_train[i, j] += index_from;
+                        if (x_train_array[i, j] == 0)
+                            break;
+                        x_train_array[i, j] += index_from;
                     }
                 }
-                for (var i = 0; i < x_test.shape[0]; i++)
+                for (var i = 0; i < x_test_array.GetLength(0); i++)
                 {
-                    for (var j = 0; j < x_test.shape[1]; j++)
+                    for (var j = 0; j < x_test_array.GetLength(1); j++)
                     {
-                        if (x_test[i, j] != 0)
-                            x_test[i, j] += index_from;
+                        if (x_test_array[i, j] == 0)
+                            break;
+                        x_test[i, j] += index_from;
                     }
                 }
             }
 
-            if (maxlen != null)
+            if (maxlen == null)
             {
-                (x_train, labels_train) = data_utils._remove_long_seq((int)maxlen, x_train, labels_train);
-                (x_test, labels_test) = data_utils._remove_long_seq((int)maxlen, x_test, labels_test);
-                if (x_train.size == 0 || x_test.size == 0)
-                    throw new ValueError("After filtering for sequences shorter than maxlen=" +
-                        $"{maxlen}, no sequence was kept. Increase maxlen.");
+                maxlen = max(x_train_array.GetLength(1), x_test_array.GetLength(1));
             }
+            (x_train, labels_train) = data_utils._remove_long_seq((int)maxlen, x_train_array, labels_train_array);
+            (x_test, labels_test) = data_utils._remove_long_seq((int)maxlen, x_test_array, labels_test_array);
+            if (x_train.size == 0 || x_test.size == 0)
+                throw new ValueError("After filtering for sequences shorter than maxlen=" +
+                    $"{maxlen}, no sequence was kept. Increase maxlen.");
 
             var xs = np.concatenate(new[] { x_train, x_test });
             var labels = np.concatenate(new[] { labels_train, labels_test });
+            var xs_array = (int[,])xs.ToMultiDimArray<int>();
 
-            if(num_words == null)
+            if (num_words == null)
             {
                 num_words = 0;
-                for (var i = 0; i < xs.shape[0]; i++)
-                    for (var j = 0; j < xs.shape[1]; j++)
-                        num_words = max((int)num_words, (int)xs[i][j]);
+                for (var i = 0; i < xs_array.GetLength(0); i++)
+                    for (var j = 0; j < xs_array.GetLength(1); j++)
+                        num_words = max((int)num_words, (int)xs_array[i, j]);
             }
 
             // by convention, use 2 as OOV word
@@ -175,32 +186,32 @@ public DatasetPass load_data(
             // 0 (padding), 1 (start), 2 (OOV)
             if (oov_char != null)
             {
-                int[,] new_xs = new int[xs.shape[0], xs.shape[1]];
-                for(var i = 0; i < xs.shape[0]; i++)
+                int[,] new_xs_array = new int[xs_array.GetLength(0), xs_array.GetLength(1)];
+                for (var i = 0; i < xs_array.GetLength(0); i++)
                 {
-                    for(var j = 0; j < xs.shape[1]; j++)
+                    for (var j = 0; j < xs_array.GetLength(1); j++)
                     {
-                        if ((int)xs[i][j] == 0 || skip_top <= (int)xs[i][j] && (int)xs[i][j] < num_words)
-                            new_xs[i, j] = (int)xs[i][j];
+                        if (xs_array[i, j] == 0 || skip_top <= xs_array[i, j] && xs_array[i, j] < num_words)
+                            new_xs_array[i, j] = xs_array[i, j];
                         else
-                            new_xs[i, j] = (int)oov_char;
+                            new_xs_array[i, j] = (int)oov_char;
                     }
                 }
-                xs = new NDArray(new_xs);
+                xs = new NDArray(new_xs_array);
             }
             else
             {
-                int[,] new_xs = new int[xs.shape[0], xs.shape[1]];
-                for (var i = 0; i < xs.shape[0]; i++)
+                int[,] new_xs_array = new int[xs_array.GetLength(0), xs_array.GetLength(1)];
+                for (var i = 0; i < xs_array.GetLength(0); i++)
                 {
                     int k = 0;
-                    for (var j = 0; j < xs.shape[1]; j++)
+                    for (var j = 0; j < xs_array.GetLength(1); j++)
                     {
-                        if ((int)xs[i][j] == 0 || skip_top <= (int)xs[i][j] && (int)xs[i][j] < num_words)
-                            new_xs[i, k++] = (int)xs[i][j];
+                        if (xs_array[i, j] == 0 || skip_top <= xs_array[i, j] && xs_array[i, j] < num_words)
+                            new_xs_array[i, k++] = xs_array[i, j];
                     }
                 }
-                xs = new NDArray(new_xs);
+                xs = new NDArray(new_xs_array);
             }
 
             var idx = len(x_train);
diff --git a/src/TensorFlowNET.Keras/Utils/data_utils.cs b/src/TensorFlowNET.Keras/Utils/data_utils.cs
index 16b121b07..57ae76695 100644
--- a/src/TensorFlowNET.Keras/Utils/data_utils.cs
+++ b/src/TensorFlowNET.Keras/Utils/data_utils.cs
@@ -54,23 +54,25 @@ public static (NDArray, NDArray) _remove_long_seq(int maxlen, NDArray seq, NDArr
 
             */
             List<int[]> new_seq = new List<int[]>();
-            List<int> new_label = new List<int>();
+            List<long> new_label = new List<long>();
 
-            for (var i = 0; i < seq.shape[0]; i++)
+            var seq_array = (int[,])seq.ToMultiDimArray<int>();
+            var label_array = (long[])label.ToArray<long>();
+            for (var i = 0; i < seq_array.GetLength(0); i++)
             {
-                if (maxlen < seq.shape[1] && seq[i][maxlen] != 0)
+                if (maxlen < seq_array.GetLength(1) && seq_array[i,maxlen] != 0)
                     continue;
                 int[] sentence = new int[maxlen];
-                for (var j = 0; j < maxlen && j < seq.shape[1]; j++)
+                for (var j = 0; j < maxlen && j < seq_array.GetLength(1); j++)
                 {
-                    sentence[j] = seq[i, j];
+                    sentence[j] = seq_array[i, j];
                 }
                 new_seq.Add(sentence);
-                new_label.Add(label[i]);
+                new_label.Add(label_array[i]);
             }
 
             int[,] new_seq_array = new int[new_seq.Count, maxlen];
-            int[] new_label_array = new int[new_label.Count];
+            long[] new_label_array = new long[new_label.Count];
 
             for (var i = 0; i < new_seq.Count; i++)
             {
diff --git a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
index 251eeff90..183544ab6 100644
--- a/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
+++ b/test/TensorFlowNET.UnitTest/Dataset/DatasetTest.cs
@@ -204,7 +204,7 @@ public void GetData()
         {
             var vocab_size = 20000; // Only consider the top 20k words
             var maxlen = 200; // Only consider the first 200 words of each movie review
-            var dataset = keras.datasets.imdb.load_data(num_words: vocab_size);
+            var dataset = keras.datasets.imdb.load_data(num_words: vocab_size, maxlen: maxlen);
             var x_train = dataset.Train.Item1;
             var y_train = dataset.Train.Item2;
             var x_val = dataset.Test.Item1;
@@ -217,16 +217,17 @@ public void GetData()
         }
         IEnumerable<int[]> RemoveZeros(NDArray data)
         {
+            var data_array = (int[,])data.ToMultiDimArray<int>();
             List<int[]> new_data = new List<int[]>();
-            for (var i = 0; i < data.shape[0]; i++)
+            for (var i = 0; i < data_array.GetLength(0); i++)
             {
                 List<int> new_array = new List<int>();
-                for (var j = 0; j < data.shape[1]; j++)
+                for (var j = 0; j < data_array.GetLength(1); j++)
                 {
-                    if (data[i][j] == 0)
+                    if (data_array[i, j] == 0)
                         break;
                     else
-                        new_array.Add((int)data[i][j]);
+                        new_array.Add(data_array[i, j]);
                 }
                 new_data.Add(new_array.ToArray());
             }

From 114282885589956a29d7bcd015f55e966cb12532 Mon Sep 17 00:00:00 2001
From: Asaf Agami <asaf92@gmail.com>
Date: Sun, 10 Sep 2023 18:09:38 +0300
Subject: [PATCH 12/77] fix: model does not stop on stop_training == true

---
 src/TensorFlowNET.Keras/Engine/Model.Fit.cs | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
index de57f19ae..d6f89d8be 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
@@ -224,6 +224,10 @@ History FitInternal(DataHandler data_handler, int epochs, int validation_step, i
 
                 GC.Collect();
                 GC.WaitForPendingFinalizers();
+                if (stop_training)
+                {
+                    break;
+                }
             }
 
             return callbacks.History;
@@ -283,6 +287,10 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
 
                 GC.Collect();
                 GC.WaitForPendingFinalizers();
+                if (stop_training)
+                {
+                    break;
+                }
             }
 
             return callbacks.History;
@@ -339,6 +347,10 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
 
                 GC.Collect();
                 GC.WaitForPendingFinalizers();
+                if (stop_training)
+                {
+                    break;
+                }
             }
 
             return callbacks.History;

From c33a3a7444469348c520c25c5f5506544573939d Mon Sep 17 00:00:00 2001
From: Alexander Novikov <novikov_alexander@live.ru>
Date: Wed, 13 Sep 2023 17:18:43 +0000
Subject: [PATCH 13/77] cached_session for graph tests

---
 .../ControlFlowTest/WhileContextTestCase.cs   |   3 +-
 .../GradientTest/GradientTest.cs              |  21 ++-
 .../PythonTest.cs                             | 148 +++++++++++++++++-
 3 files changed, 156 insertions(+), 16 deletions(-)

diff --git a/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs b/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs
index c637cf858..4dee61337 100644
--- a/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs
@@ -1,5 +1,6 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
+using System.Linq;
 using Tensorflow;
 using static Tensorflow.Binding;
 
@@ -29,7 +30,7 @@ private void _testWhileContextHelper(int maximum_iterations)
             var b = new Func<Tensor, Tensor>(x => math_ops.add(x, 1, name: "c"));
             //control_flow_ops.while_loop(
             //      c, b, i , maximum_iterations: tf.constant(maximum_iterations));
-            foreach (Operation op in sess.graph.get_operations())
+            foreach (Operation op in sess.Single().graph.get_operations())
             {
                 var control_flow_context = op._get_control_flow_context();
                 /*if (control_flow_context != null)
diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
index f240817b4..37bc646dd 100644
--- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
@@ -388,22 +388,19 @@ public void testBoundaryStop()
 
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testBoundaryContinue()
         {
-            //@test_util.run_v1_only("b/120545219")
-            //def testBoundaryContinue(self):
-            //  # Test that we differentiate both 'x' and 'y' correctly when x is a
-            //  # predecessor of y.
-            //  with self.cached_session():
-            //    x = constant(1.0)
-            //    y = x * 2.0
-            //    z = y * 3.0
-            //    grads = gradients.gradients(z, [x, y])
-            //    self.assertTrue(all(x is not None for x in grads))
-            //    self.assertEqual(6.0, grads[0].eval())
+            // Test that we differentiate both 'x' and 'y' correctly when x is a
+            // predecessor of y.
 
+            self.cached_session();
+            var x = tf.constant(1.0);
+            var y = x * 2.0;
+            var z = y * 3.0;
+            var grads = tf.gradients(z, new[] { x, y });
+            self.assertTrue(all(grads.Select(x => x != null)));
+            self.assertEqual(6.0, grads[0].eval());
         }
 
         [Ignore("TODO")]
diff --git a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs b/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
index 513791933..90abc0cc9 100644
--- a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
@@ -6,6 +6,8 @@
 using System.Linq;
 using Tensorflow;
 using static Tensorflow.Binding;
+using OneOf.Types;
+using System.Collections.Generic;
 
 namespace TensorFlowNET.UnitTest
 {
@@ -139,6 +141,21 @@ public void assertProtoEquals(object toProto, object o)
 
         #region tensor evaluation and test session
 
+        private Session _cached_session = null;
+        private Graph _cached_graph = null;
+        private object _cached_config = null;
+        private bool _cached_force_gpu = false;
+
+        private void _ClearCachedSession()
+        {
+            if (self._cached_session != null)
+            {
+                self._cached_session.Dispose();
+                self._cached_session = null;
+            }
+        }
+
+
         //protected object _eval_helper(Tensor[] tensors)
         //{
         //    if (tensors == null)
@@ -203,10 +220,57 @@ public T evaluate<T>(Tensor tensor)
             }
         }
 
-
-        public Session cached_session()
+        ///Returns a TensorFlow Session for use in executing tests.
+        public IEnumerable<Session> cached_session(
+            Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false)
         {
-            throw new NotImplementedException();
+            // This method behaves differently than self.session(): for performance reasons
+            // `cached_session` will by default reuse the same session within the same
+            // test.The session returned by this function will only be closed at the end
+            // of the test(in the TearDown function).
+
+            // Use the `use_gpu` and `force_gpu` options to control where ops are run.If
+            // `force_gpu` is True, all ops are pinned to `/ device:GPU:0`. Otherwise, if
+            // `use_gpu` is True, TensorFlow tries to run as many ops on the GPU as
+            // possible.If both `force_gpu and `use_gpu` are False, all ops are pinned to
+            // the CPU.
+
+            // Example:
+            // python
+            // class MyOperatorTest(test_util.TensorFlowTestCase) :
+            //   def testMyOperator(self):
+            //     with self.cached_session() as sess:
+            //       valid_input = [1.0, 2.0, 3.0, 4.0, 5.0]
+            //     result = MyOperator(valid_input).eval()
+            //       self.assertEqual(result, [1.0, 2.0, 3.0, 5.0, 8.0]
+            //       invalid_input = [-1.0, 2.0, 7.0]
+            //     with self.assertRaisesOpError("negative input not supported"):
+            //         MyOperator(invalid_input).eval()
+
+
+            // Args:
+            //   graph: Optional graph to use during the returned session.
+            //   config: An optional config_pb2.ConfigProto to use to configure the
+            //     session.
+            //   use_gpu: If True, attempt to run as many ops as possible on GPU.
+            //   force_gpu: If True, pin all ops to `/device:GPU:0`.
+
+            // Yields:
+            //   A Session object that should be used as a context manager to surround
+            //   the graph building and execution code in a test case.
+
+
+            // TODO:
+            //  if context.executing_eagerly():
+            //    return self._eval_helper(tensors)
+            //  else:
+            {
+                var sess = self._get_cached_session(
+                    graph, config, force_gpu, crash_if_inconsistent_args: true);
+                var cached = self._constrain_devices_and_set_default(sess, use_gpu, force_gpu);
+                return cached;
+                
+            }
         }
 
         //Returns a TensorFlow Session for use in executing tests.
@@ -254,6 +318,40 @@ public Session session(Graph graph = null, object config = null, bool use_gpu =
             return s.as_default();
         }
 
+        private IEnumerable<Session> _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu)
+        {
+            // Set the session and its graph to global default and constrain devices."""
+            // if context.executing_eagerly():
+            //    yield None
+            // else:
+            {
+                sess.graph.as_default();
+                sess.as_default();
+                {
+                    if (force_gpu)
+                    {
+                        // TODO:
+
+                        // Use the name of an actual device if one is detected, or
+                        // '/device:GPU:0' otherwise
+                        /* var gpu_name = gpu_device_name();
+                        if (!gpu_name)
+                            gpu_name = "/device:GPU:0"
+                        using (sess.graph.device(gpu_name)) {
+                            yield return sess;
+                        }*/
+                        yield return sess;
+                    }
+                    else if (use_gpu)
+                        yield return sess;
+                    else 
+                        using (sess.graph.device("/device:CPU:0"))
+                            yield return sess;
+                }
+                
+            }
+        }
+
         // See session() for details.
         private Session _create_session(Graph graph, object cfg, bool forceGpu)
         {
@@ -298,6 +396,50 @@ private Session _create_session(Graph graph, object cfg, bool forceGpu)
             return new Session(graph);//, config = prepare_config(config))
         }
 
+        private Session _get_cached_session(
+                          Graph graph = null,
+                          object config = null,
+                          bool force_gpu = false,
+                          bool crash_if_inconsistent_args = true)
+        {
+            // See cached_session() for documentation.
+            if (self._cached_session == null)
+            {
+                var sess = self._create_session(graph, config, force_gpu);
+                self._cached_session = sess;
+                self._cached_graph = graph;
+                self._cached_config = config;
+                self._cached_force_gpu = force_gpu;
+                return sess;
+            } else {
+
+                if (crash_if_inconsistent_args && !self._cached_graph.Equals(graph))
+                    throw new ValueError(@"The graph used to get the cached session is 
+                                           different than the one that was used to create the
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                if (crash_if_inconsistent_args && !self._cached_config.Equals(config)) {
+                    throw new ValueError(@"The config used to get the cached session is 
+                                           different than the one that was used to create the 
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                }
+                if (crash_if_inconsistent_args && !self._cached_force_gpu.Equals(force_gpu)) {
+                    throw new ValueError(@"The force_gpu value used to get the cached session is 
+                                           different than the one that was used to create the 
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                }
+                return _cached_session;
+            }
+        }
+
+        [TestCleanup]
+        public void Cleanup()
+        {
+            _ClearCachedSession();
+        }
+
         #endregion
 
         public void AssetSequenceEqual<T>(T[] a, T[] b)

From ae50fa93bac27f9c7c77b7a38289f20d78480b3a Mon Sep 17 00:00:00 2001
From: Alexander Novikov <novikov_alexander@live.ru>
Date: Thu, 14 Sep 2023 03:58:15 +0000
Subject: [PATCH 14/77] fix fleaky test boundary continue

---
 test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
index 37bc646dd..0b4d79bb7 100644
--- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
@@ -394,7 +394,7 @@ public void testBoundaryContinue()
             // Test that we differentiate both 'x' and 'y' correctly when x is a
             // predecessor of y.
 
-            self.cached_session();
+            var sess = self.cached_session().Single();
             var x = tf.constant(1.0);
             var y = x * 2.0;
             var z = y * 3.0;

From 9d71cad96ecb69cd83c2b113fc808b608fbd7875 Mon Sep 17 00:00:00 2001
From: Alexander Novikov <novikov_alexander@live.ru>
Date: Thu, 14 Sep 2023 11:21:18 +0000
Subject: [PATCH 15/77] using and no IEnumerable

---
 .../ControlFlowTest/WhileContextTestCase.cs   |  4 ++--
 .../GradientTest/GradientTest.cs              | 16 ++++++++------
 .../PythonTest.cs                             | 22 +++++++++----------
 3 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs b/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs
index 4dee61337..e93324f3e 100644
--- a/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/ControlFlowTest/WhileContextTestCase.cs
@@ -24,13 +24,13 @@ public void SimpleWhileLoop()
         private void _testWhileContextHelper(int maximum_iterations)
         {
             // TODO: implement missing code dependencies
-            var sess = this.cached_session();
+            using var sess = this.cached_session();
             var i = constant_op.constant(0, name: "i");
             var c = new Func<Tensor, Tensor>(x => gen_math_ops.less(x, ops.convert_to_tensor(10), name: "c"));
             var b = new Func<Tensor, Tensor>(x => math_ops.add(x, 1, name: "c"));
             //control_flow_ops.while_loop(
             //      c, b, i , maximum_iterations: tf.constant(maximum_iterations));
-            foreach (Operation op in sess.Single().graph.get_operations())
+            foreach (Operation op in sess.graph.get_operations())
             {
                 var control_flow_context = op._get_control_flow_context();
                 /*if (control_flow_context != null)
diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
index 0b4d79bb7..099c11627 100644
--- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
@@ -394,13 +394,15 @@ public void testBoundaryContinue()
             // Test that we differentiate both 'x' and 'y' correctly when x is a
             // predecessor of y.
 
-            var sess = self.cached_session().Single();
-            var x = tf.constant(1.0);
-            var y = x * 2.0;
-            var z = y * 3.0;
-            var grads = tf.gradients(z, new[] { x, y });
-            self.assertTrue(all(grads.Select(x => x != null)));
-            self.assertEqual(6.0, grads[0].eval());
+            using (self.cached_session())
+            {
+                var x = tf.constant(1.0);
+                var y = x * 2.0;
+                var z = y * 3.0;
+                var grads = tf.gradients(z, new[] { x, y });
+                self.assertTrue(all(grads.Select(x => x != null)));
+                self.assertEqual(6.0, grads[0].eval());
+            }   
         }
 
         [Ignore("TODO")]
diff --git a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs b/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
index 90abc0cc9..ccf59f5ae 100644
--- a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
@@ -221,7 +221,7 @@ public T evaluate<T>(Tensor tensor)
         }
 
         ///Returns a TensorFlow Session for use in executing tests.
-        public IEnumerable<Session> cached_session(
+        public Session cached_session(
             Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false)
         {
             // This method behaves differently than self.session(): for performance reasons
@@ -267,9 +267,8 @@ public IEnumerable<Session> cached_session(
             {
                 var sess = self._get_cached_session(
                     graph, config, force_gpu, crash_if_inconsistent_args: true);
-                var cached = self._constrain_devices_and_set_default(sess, use_gpu, force_gpu);
-                return cached;
-                
+                using var cached = self._constrain_devices_and_set_default(sess, use_gpu, force_gpu);
+                return cached; 
             }
         }
 
@@ -318,13 +317,12 @@ public Session session(Graph graph = null, object config = null, bool use_gpu =
             return s.as_default();
         }
 
-        private IEnumerable<Session> _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu)
+        private Session _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu)
         {
             // Set the session and its graph to global default and constrain devices."""
-            // if context.executing_eagerly():
-            //    yield None
-            // else:
-            {
+            if (tf.executing_eagerly())
+                return null;
+            else {
                 sess.graph.as_default();
                 sess.as_default();
                 {
@@ -340,13 +338,13 @@ private IEnumerable<Session> _constrain_devices_and_set_default(Session sess, bo
                         using (sess.graph.device(gpu_name)) {
                             yield return sess;
                         }*/
-                        yield return sess;
+                        return sess;
                     }
                     else if (use_gpu)
-                        yield return sess;
+                        return sess;
                     else 
                         using (sess.graph.device("/device:CPU:0"))
-                            yield return sess;
+                            return sess;
                 }
                 
             }

From adef5bcdc518d879ca385d37fe17ce5b2a329c44 Mon Sep 17 00:00:00 2001
From: Alexander Novikov <novikov_alexander@live.ru>
Date: Thu, 14 Sep 2023 15:37:16 +0000
Subject: [PATCH 16/77] gradient tests

---
 .../GradientTest/GradientTest.cs              | 383 +++++++++++-------
 1 file changed, 236 insertions(+), 147 deletions(-)

diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
index 099c11627..b0827f2ab 100644
--- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
@@ -5,6 +5,7 @@
 using System.Linq;
 using Tensorflow;
 using static Tensorflow.Binding;
+using Tensorflow.Framework;
 
 namespace TensorFlowNET.UnitTest.Gradient
 {
@@ -394,6 +395,8 @@ public void testBoundaryContinue()
             // Test that we differentiate both 'x' and 'y' correctly when x is a
             // predecessor of y.
 
+            //TODO: @test_util.run_v1_only("b/120545219")
+
             using (self.cached_session())
             {
                 var x = tf.constant(1.0);
@@ -402,66 +405,61 @@ public void testBoundaryContinue()
                 var grads = tf.gradients(z, new[] { x, y });
                 self.assertTrue(all(grads.Select(x => x != null)));
                 self.assertEqual(6.0, grads[0].eval());
-            }   
+            }
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testAggregationMethodAccumulateN()
         {
+            //TODO: @test_util.run_v1_only("b/120545219")
 
-            //@test_util.run_v1_only("b/120545219")
-            //def testAggregationMethodAccumulateN(self):
-            //  with self.cached_session():
-            //    x = constant(1.0)
-            //    y = x * 2.0
-            //    z = y + y + y + y + y + y + y + y + y + y
-            //    grads = gradients.gradients(
-            //        z, [x, y],
-            //        aggregation_method=gradients.AggregationMethod.
-            //        EXPERIMENTAL_ACCUMULATE_N)
-            //    self.assertTrue(all(x is not None for x in grads))
-            //    self.assertEqual(20.0, grads[0].eval())
-            //    self.assertEqual(10.0, grads[1].eval())
-
+            using (self.cached_session())
+            {
+                var x = tf.constant(1.0);
+                var y = x * 2.0;
+                var z = y + y + y + y + y + y + y + y + y + y;
+                var grads = tf.gradients(z, new[] { x, y },
+                        aggregation_method: AggregationMethod.EXPERIMENTAL_ACCUMULATE_N);
+                self.assertTrue(all(grads.Select(x => x != null)));
+                self.assertEqual(20.0, grads[0].eval());
+                self.assertEqual(10.0, grads[1].eval());
+            }
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testAggregationMethodAddN()
         {
-            //@test_util.run_v1_only("b/120545219")
-            //def testAggregationMethodAddN(self):
-            //  with self.cached_session():
-            //    x = constant(1.0)
-            //    y = x * 2.0
-            //    z = y + y + y + y + y + y + y + y + y + y
-            //    grads = gradients.gradients(
-            //        z, [x, y], aggregation_method=gradients.AggregationMethod.ADD_N)
-            //    self.assertTrue(all(x is not None for x in grads))
-            //    self.assertEqual(20.0, grads[0].eval())
-            //    self.assertEqual(10.0, grads[1].eval())
-
+            //TODO: @test_util.run_v1_only("b/120545219")
 
+            using (self.cached_session())
+            {
+                var x = tf.constant(1.0);
+                var y = x * 2.0;
+                var z = y + y + y + y + y + y + y + y + y + y;
+                var grads = tf.gradients(z, new[] { x, y },
+                        aggregation_method: AggregationMethod.ADD_N);
+                self.assertTrue(grads.All(x => x != null));
+                self.assertEqual(20.0, grads[0].eval());
+                self.assertEqual(10.0, grads[1].eval());
+            }
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testAggregationMethodTree()
         {
-            //@test_util.run_v1_only("b/120545219")
-            //def testAggregationMethodTree(self):
-            //  with self.cached_session():
-            //    x = constant(1.0)
-            //    y = x * 2.0
-            //    z = y + y + y + y + y + y + y + y + y + y
-            //    grads = gradients.gradients(
-            //        z, [x, y],
-            //        aggregation_method=gradients.AggregationMethod.EXPERIMENTAL_TREE)
-            //    self.assertTrue(all(x is not None for x in grads))
-            //    self.assertEqual(20.0, grads[0].eval())
-            //    self.assertEqual(10.0, grads[1].eval())
+            //TODO: @test_util.run_v1_only("b/120545219")
 
+            using (self.cached_session())
+            {
+                var x = tf.constant(1.0);
+                var y = x * 2.0;
+                var z = y + y + y + y + y + y + y + y + y + y;
+                var grads = tf.gradients(z, new[] { x, y },
+                        aggregation_method: AggregationMethod.EXPERIMENTAL_TREE);
+                self.assertTrue(grads.All(x => x != null));
+                self.assertEqual(20.0, grads[0].eval());
+                self.assertEqual(10.0, grads[1].eval());
+            }
         }
 
         [Ignore("TODO")]
@@ -490,24 +488,32 @@ public void testNoGradientForStringOutputs()
             //    self.assertTrue(isinstance(grads[0], ops.Tensor))
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testSingletonIndexedSlices()
         {
+            tf.Graph().as_default();
+
+            var x = tf.placeholder(TF_DataType.TF_FLOAT);
+            var y = tf.identity(x);
+            var dy_indices = tf.placeholder(TF_DataType.TF_INT32);
+            var dy_values = tf.placeholder(TF_DataType.TF_FLOAT);
+            Tensor dy = new IndexedSlices(dy_values, dy_indices);
+            var dx = tf.gradients(new[] { y }, new[] { x }, grad_ys: new[] { dy })[0];
+            // The IndexedSlices gradient of tf.identity is the identity map.
+            using (var sess = self.cached_session())
+            {
+                var feed_dict = new FeedItem[]
+                {
+                    ( x, new Tensor(new float[] { 1.0f }) ),
+                    (dy_indices, new Tensor(new int[] { 0 })),
+                    (dy_values, new Tensor(new float[] { 2.0f }))
+                };
+                var result = sess.run(new[] { dx, dy }, feed_dict);
+                var vdx = result[0];
+                var vdy = result[1];
+                self.assertEqual(vdx, vdy);
+            }
 
-            //def testSingletonIndexedSlices(self):
-            //  with ops.Graph().as_default():
-            //    x = array_ops.placeholder(dtypes.float32)
-            //    y = array_ops.identity(x)
-            //    dy = ops.IndexedSlices(
-            //        array_ops.placeholder(dtypes.float32),
-            //        array_ops.placeholder(dtypes.int32))
-            //    dx, = gradients.gradients(y, x, grad_ys=dy)
-            //    # The IndexedSlices gradient of tf.identity is the identity map.
-            //    with self.cached_session() as sess:
-            //      vdx, vdy = sess.run(
-            //          [dx, dy], feed_dict={x: [1.0], dy.indices: [0], dy.values: [2.0]})
-            //    self.assertEqual(vdx, vdy)
         }
 
         [Ignore("TODO")]
@@ -575,26 +581,25 @@ public void testVariableRefGradient()
             //    self.assertIsNotNone(gradient)
         }
 
-        [Ignore("TODO")]
         [TestMethod]
         public void testDependentYs()
         {
-            //@test_util.run_v1_only("b/120545219")
-            //def testDependentYs(self):
-            //  with self.cached_session():
-            //    x = constant_op.constant(3.0)
-            //    y = math_ops.square(x)
-            //    y1 = math_ops.square(y)
-            //    y2 = math_ops.square(y1)
-            //    g = gradients.gradients([y, y2], x)
-            //    self.assertAllClose(17502.0, g[0].eval())
-            //    g = gradients.gradients(y + y2, x)
-            //    self.assertAllClose(17502.0, g[0].eval())
-            //    z = array_ops.identity(y)
-            //    z2 = array_ops.identity(y2)
-            //    g = gradients.gradients([z, z2], x)
-            //    self.assertAllClose(17502.0, g[0].eval())
-
+            //TODO: @test_util.run_v1_only("b/120545219")
+            using (self.cached_session())
+            {
+                var x = constant_op.constant(3.0);
+                var y = math_ops.square(x);
+                var y1 = math_ops.square(y);
+                var y2 = math_ops.square(y1);
+                var g = tf.gradients(new[] { y, y2 }, new[] { x });
+                self.assertAllClose(17502.0, g[0].eval());
+                g = tf.gradients(y + y2, x);
+                self.assertAllClose(17502.0, g[0].eval());
+                var z = array_ops.identity(y);
+                var z2 = array_ops.identity(y2);
+                g = tf.gradients(new[] { z, z2 }, new[] { x });
+                self.assertAllClose(17502.0, g[0].eval());
+            }
         }
 
         [Ignore("TODO")]
@@ -602,75 +607,152 @@ public void testDependentYs()
         public void testPartialDerivatives()
         {
 
-            //@test_util.run_v1_only("b/120545219")
-            //def testPartialDerivatives(self):
-            //  with self.cached_session():
-            //    x = constant_op.constant(1.)
-            //    y = 2 * x
-            //    z = x + y
-            //    totalg = gradients.gradients(z, [x, y])
-            //    self.assertEqual([3.0, 1.0], [g.eval() for g in totalg])
-            //    partialg = gradients.gradients(z, [x, y], stop_gradients=[x, y])
-            //    self.assertEqual([1.0, 1.0], [g.eval() for g in partialg])
+            //TODO: @test_util.run_v1_only("b/120545219")
+            using (self.cached_session())
+            {
+                var x = tf.constant(1.0);
+                var y = 2 * x;
+                var z = x + y;
+                var totalg = tf.gradients(z, new[] { x, y });
+                self.assertEqual(new[] { 3.0, 1.0 }, totalg.Select(g => g.eval()));
+                var partialg = tf.gradients(z, new[] { x, y }, stop_gradients: new[] { x, y });
+                self.assertEqual(new[] { 1.0, 1.0 }, partialg.Select(g => g.eval()));
+            }
         }
 
-        [Ignore("TODO")]
+        // TODO: remove when np.testing.assert_allclose(a, b) is implemented
+        private class CollectionComparer : System.Collections.IComparer
+        {
+            private readonly double _epsilon = 1e-07;
+
+            public int Compare(object x, object y)
+            {
+                var a = (double)x;
+                var b = (double)y;
+
+                double delta = Math.Abs(a - b);
+                if (delta < _epsilon)
+                {
+                    return 0;
+                }
+                return a.CompareTo(b);
+            }
+        }
+
+        private struct Case
+        {
+            public Tensor[] grad1;
+            public Tensor[] grad2;
+            public string constants;
+            public string variables;
+        }
+
+        [Ignore("FIXME")]
         [TestMethod]
         public void testStopGradients()
         {
+            
+            //TODO: @test_util.run_v1_only("b/120545219")
+            Dictionary<char, Tensor> makeGraph(RandomizedImpl rng, string stop_gradients)
+            {
+                Tensor functionOf(Tensor[] xs, int k)
+                {
+                    var shape = new Shape(k, k);
+                    // TODO: replace by DefaultIfEmpty() before Aggregate().
+                    if (!xs.Any())
+                    {
+                        return rng.random(shape).astype(np.float32);
+                    }
+                    return xs.Select(x => gen_math_ops.mat_mul(rng.random(shape).astype(np.float32), x))
+                        .Aggregate((t1, t2) => t1 + t2)
+                    + rng.random(shape).astype(np.float32);
+                }
 
+                var a = functionOf(Array.Empty<Tensor>(), 3);
+                if (stop_gradients.Contains('a')) a = array_ops.stop_gradient(a);
+                var b = functionOf(new Tensor[] { a }, 3);
+                if (stop_gradients.Contains('b')) b = array_ops.stop_gradient(b);
+                var c = functionOf(new Tensor[] { a, b }, 3);
+                if (stop_gradients.Contains('c')) c = array_ops.stop_gradient(c);
+                var d = functionOf(new Tensor[] { b, c }, 3);
+                if (stop_gradients.Contains('d')) d = array_ops.stop_gradient(d);
 
-            //@test_util.run_v1_only("b/120545219")
-            //def testStopGradients(self):
-            //  def _MakeGraph(rng, stop_gradients=()):
-            //    def _FunctionOf(xs, k=3):
-            //      return ops.convert_to_tensor(
-            //          sum(math_ops.matmul(rng.rand(k, k), x) for x in xs)
-            //          + rng.rand(k, k))
-
-            //    a = _FunctionOf([])
-            //    if "a" in stop_gradients: a = array_ops.stop_gradient(a)
-            //    b = _FunctionOf([a])
-            //    if "b" in stop_gradients: b = array_ops.stop_gradient(b)
-            //    c = _FunctionOf([a, b])
-            //    if "c" in stop_gradients: c = array_ops.stop_gradient(c)
-            //    d = _FunctionOf([b, c])
-            //    if "d" in stop_gradients: d = array_ops.stop_gradient(d)
-            //    return dict(a=a, b=b, c=c, d=d)
-
-            //  def _Gradients(ys, xs, **kwargs):
-            //    dydxs = gradients.gradients(ys, xs, **kwargs)
-            //    dydxs = [0. * x if dydx is None else dydx
-            //             for x, dydx in zip(xs, dydxs)]
-            //    return dydxs
-            //  seed = np.random.randint(1000)
-            //  cases = []
-            //  subsets = [""] + "a b c d ab ac ad bc bd cd abc abd acd bcd abcd".split()
-            //  graph = _MakeGraph(np.random.RandomState(seed))
-            //  for constants in subsets:
-            //    graph_with_stops = _MakeGraph(np.random.RandomState(seed), constants)
-            //    for variables_ in subsets:
-            //      # compute the gradient when stopped using tf.stop_gradients
-            //      grad1 = _Gradients([graph_with_stops["d"]],
-            //                         [graph_with_stops[v] for v in variables_])
-            //      # compute the gradient when stopped using the stop_gradients kwarg
-            //      grad2 = _Gradients([graph["d"]],
-            //                         [graph[v] for v in variables_],
-            //                         stop_gradients=[graph[v] for v in constants])
-            //      cases.append(dict(grad1=grad1, grad2=grad2,
-            //                        constants=constants, variables=variables_))
-
-            //  # evaluate all tensors in one call to session.run for speed
-            //  with self.cached_session() as sess:
-            //    results = sess.run([(case["grad1"], case["grad2"]) for case in cases])
-
-            //  for (npgrad1, npgrad2), case in zip(results, cases):
-            //    for a, b in zip(npgrad1, npgrad2):
-            //      np.testing.assert_allclose(a, b)
+                return new Dictionary<char, Tensor>
+                    {
+                        { 'a', a },
+                        { 'b', b },
+                        { 'c', c },
+                        { 'd', d }
+                    };
+            }
+
+            Tensor[] gradients(Tensor[] ys, Tensor[] xs, Tensor[] stop_gradients = null)
+            {
+                var dydxs = tf.gradients(ys, xs, stop_gradients);
+                dydxs = dydxs.Select((dydx, i) => dydx == null ? xs[i] * 0 : dydx).ToArray();
+                return dydxs;
+            }
+
+            var seed = np.random.randint(1000);
+            // TODO: remove next line when np.random.RandomState implemented.
+            tf.set_random_seed(seed);
+            var cases = new List<Case>();
+            // TODO: add "" case.
+            var subsets = new List<string> { "" }.Concat("a b c d ab ac ad bc bd cd abc abd acd bcd abcd".Split());
+            // TODO: pass np.random.RandomState(seed) instead of np.random
+            var graph = makeGraph(np.random, string.Empty);
+            foreach (var constants in subsets)
+            {
+                var graphWithStops = makeGraph(np.random, constants);
+                foreach (var variables_ in subsets)
+                {
+                    // compute the gradient when stopped using tf.stop_gradients
+                    var grad1 = gradients(
+                        new[] { graphWithStops['d'] },
+                        variables_.ToCharArray().Select(v => graphWithStops[v]).ToArray()
+                    );
+                    // compute the gradient when stopped using the stop_gradients from args
+                    var grad2 = gradients(
+                        new[] { graph['d'] },
+                        variables_.ToCharArray().Select(v => graph[v]).ToArray(),
+                        constants.ToCharArray().Select(c => graph[c]).DefaultIfEmpty(null)?.ToArray()
+                    );
+                    cases.Add(new Case
+                    {
+                        grad1 = grad1,
+                        grad2 = grad2,
+                        variables = variables_,
+                        constants = constants,
+                    }) ;
+                }
+            }
 
+            // evaluate all tensors in one call to session.run for speed
+            using (var sess = self.cached_session())
+            {
+                var results = sess.run(
+                    cases.Select(case_ => (
+                        case_.grad1,
+                        case_.grad2
+                    )).ToArray()
+                );
+
+                foreach (var (result, case_) in results.Zip(cases))
+                {
+                    var npgrad1 = result[0];
+                    var npgrad2 = result[1];
+                    foreach (var (a, b) in npgrad1.Zip(npgrad2))
+                    {
+                        // TODO: np.testing.assert_allclose(a, b);
+                        CollectionAssert.AreEqual(a.ToArray(), b.ToArray(), new CollectionComparer());
+                    }
+                }
+            }
         }
 
-        [Ignore("TODO")]
+
+
+        [Ignore("TODO: Unconnected gradients are not implemented")]
         [TestMethod]
         public void testUnconnectedGradientsNoneUnconnectedGradients()
         {
@@ -685,7 +767,7 @@ public void testUnconnectedGradientsNoneUnconnectedGradients()
             //  self.assertIsNone(grad[0])
         }
 
-        [Ignore("TODO")]
+        [Ignore("TODO: Unconnected gradients are not implemented")]
         [TestMethod]
         public void testUnconnectedGradientsZerosUnconnectedGradients()
         {
@@ -699,15 +781,21 @@ public void testUnconnectedGradientsZerosUnconnectedGradients()
             //        [y], [x], unconnected_gradients="zero")
             //    with self.cached_session() as sess:
             //      self.assertAllEqual([[0.0, 0.0], [0.0, 0.0]], self.evaluate(grads)[0])
+
+            // tf.Graph().as_default();
+            // var x = tf.constant(1.0, shape: new long[] { 2, 2 });
+            // var y = tf.constant(3.0, shape: new long[] { 3, 1 });
+            // var grads = tf.gradients(new[] { y }, new[] { x }, unconnected_gradients: "zero");
+            // using (self.cached_session())
+            // {
+            //     self.assertAllEqual(new[,] { { 0.0, 0.0 }, { 0.0, 0.0 } }, self.evaluate(grads)[0]);
+            // }
         }
 
-        [Ignore("TODO")]
+        [Ignore("TODO: Unconnected gradients are not implemented")]
         [TestMethod]
         public void testUnconnectedGradientsZeroConnectedGradients()
         {
-
-
-
             //def testUnconnectedGradientsZeroConnectedGradients(self):
             //  with ops.Graph().as_default():
             //    x = constant(1.0)
@@ -716,9 +804,19 @@ public void testUnconnectedGradientsZeroConnectedGradients()
             //        [y], [x], unconnected_gradients="zero")
             //    with self.cached_session() as sess:
             //      self.assertEquals(3.0, self.evaluate(grad)[0])
+
+            // tf.Graph().as_default();
+
+            // var x = tf.constant(1.0f);
+            // var y = x * 3.0f;
+            // var grad = tf.gradients(new [] { y }, new [] { x }, unconnected_gradients: "zero");
+            // using (var sess = tf.Session())
+            // {
+            //     self.assertEquals(3.0, self.evaluate(grad)[0]);
+            // }
         }
 
-        [Ignore("TODO")]
+        [Ignore("TODO: Unconnected gradients are not implemented")]
         [TestMethod]
         public void testUnknownUnconnectedGradientsValueGiven()
         {
@@ -729,15 +827,6 @@ public void testUnknownUnconnectedGradientsValueGiven()
             //    with self.assertRaisesRegexp(
             //        ValueError, "Unknown value for unconnected_gradients: 'nonsense'"):
             //      gradients.gradients([y], [x], unconnected_gradients="nonsense")
-
         }
-
-
-
-        /*
-
-
-
-         */
     }
 }

From a9dad3ce1114aa0b140472782d2ea4e36331107d Mon Sep 17 00:00:00 2001
From: Alexander Novikov <novikov_alexander@live.ru>
Date: Thu, 14 Sep 2023 15:47:39 +0000
Subject: [PATCH 17/77] fixme labels

---
 test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
index b0827f2ab..3ce6661cc 100644
--- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
@@ -488,6 +488,7 @@ public void testNoGradientForStringOutputs()
             //    self.assertTrue(isinstance(grads[0], ops.Tensor))
         }
 
+        [Ignore("FIXME")]
         [TestMethod]
         public void testSingletonIndexedSlices()
         {

From 628b2ce7366329f03390c4fffb9a8c779bb75663 Mon Sep 17 00:00:00 2001
From: lingbai-kong <oc371@tongji.edu.cn>
Date: Fri, 15 Sep 2023 20:36:52 +0800
Subject: [PATCH 18/77] optimize temporal complexity of Imdb dataset loader

---
 src/TensorFlowNET.Keras/Datasets/Imdb.cs    | 48 +++++++++------------
 src/TensorFlowNET.Keras/Utils/data_utils.cs | 14 +++---
 2 files changed, 27 insertions(+), 35 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
index 49fc79251..081c26cb9 100644
--- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs
+++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -116,23 +116,13 @@ public DatasetPass load_data(
                 for (var i = 0; i < x_train_array.GetLength(0); i++)
                 {
                     new_x_train_array[i, 0] = (int)start_char;
-                    for (var j = 0; j < x_train_array.GetLength(1); j++)
-                    {
-                        if (x_train_array[i, j] == 0)
-                            break;
-                        new_x_train_array[i, j + 1] = x_train_array[i, j];
-                    }
+                    Array.Copy(x_train_array, i * x_train_array.GetLength(1), new_x_train_array, i * new_x_train_array.GetLength(1) + 1, x_train_array.GetLength(1));
                 }
                 int[,] new_x_test_array = new int[x_test_array.GetLength(0), x_test_array.GetLength(1) + 1];
                 for (var i = 0; i < x_test_array.GetLength(0); i++)
                 {
                     new_x_test_array[i, 0] = (int)start_char;
-                    for (var j = 0; j < x_test_array.GetLength(1); j++)
-                    {
-                        if (x_test_array[i, j] == 0)
-                            break;
-                        new_x_test_array[i, j + 1] = x_test_array[i, j];
-                    }
+                    Array.Copy(x_test_array, i * x_test_array.GetLength(1), new_x_test_array, i * new_x_test_array.GetLength(1) + 1, x_test_array.GetLength(1));
                 }
                 x_train_array = new_x_train_array;
                 x_test_array = new_x_test_array;
@@ -163,15 +153,19 @@ public DatasetPass load_data(
             {
                 maxlen = max(x_train_array.GetLength(1), x_test_array.GetLength(1));
             }
-            (x_train, labels_train) = data_utils._remove_long_seq((int)maxlen, x_train_array, labels_train_array);
-            (x_test, labels_test) = data_utils._remove_long_seq((int)maxlen, x_test_array, labels_test_array);
-            if (x_train.size == 0 || x_test.size == 0)
+            (x_train_array, labels_train_array) = data_utils._remove_long_seq((int)maxlen, x_train_array, labels_train_array);
+            (x_test_array, labels_test_array) = data_utils._remove_long_seq((int)maxlen, x_test_array, labels_test_array);
+            if (x_train_array.Length == 0 || x_test_array.Length == 0)
                 throw new ValueError("After filtering for sequences shorter than maxlen=" +
                     $"{maxlen}, no sequence was kept. Increase maxlen.");
 
-            var xs = np.concatenate(new[] { x_train, x_test });
-            var labels = np.concatenate(new[] { labels_train, labels_test });
-            var xs_array = (int[,])xs.ToMultiDimArray<int>();
+            int[,] xs_array = new int[x_train_array.GetLength(0) + x_test_array.GetLength(0), (int)maxlen];
+            Array.Copy(x_train_array, xs_array, x_train_array.Length);
+            Array.Copy(x_test_array, 0, xs_array, x_train_array.Length, x_train_array.Length);
+
+            long[] labels_array = new long[labels_train_array.Length + labels_test_array.Length];
+            Array.Copy(labels_train_array, labels_array, labels_train_array.Length);
+            Array.Copy(labels_test_array, 0, labels_array, labels_train_array.Length, labels_test_array.Length);
 
             if (num_words == null)
             {
@@ -197,7 +191,7 @@ public DatasetPass load_data(
                             new_xs_array[i, j] = (int)oov_char;
                     }
                 }
-                xs = new NDArray(new_xs_array);
+                xs_array = new_xs_array;
             }
             else
             {
@@ -211,19 +205,19 @@ public DatasetPass load_data(
                             new_xs_array[i, k++] = xs_array[i, j];
                     }
                 }
-                xs = new NDArray(new_xs_array);
+                xs_array = new_xs_array;
             }
 
-            var idx = len(x_train);
-            x_train = xs[$"0:{idx}"];
-            x_test = xs[$"{idx}:"];
-            var y_train = labels[$"0:{idx}"];
-            var y_test = labels[$"{idx}:"];
+            Array.Copy(xs_array, x_train_array, x_train_array.Length);
+            Array.Copy(xs_array, x_train_array.Length, x_test_array, 0, x_train_array.Length);
+
+            Array.Copy(labels_array, labels_train_array, labels_train_array.Length);
+            Array.Copy(labels_array, labels_train_array.Length, labels_test_array, 0, labels_test_array.Length);
 
             return new DatasetPass
             {
-                Train = (x_train, y_train),
-                Test = (x_test, y_test)
+                Train = (x_train_array, labels_train_array),
+                Test = (x_test_array, labels_test_array)
             };
         }
 
diff --git a/src/TensorFlowNET.Keras/Utils/data_utils.cs b/src/TensorFlowNET.Keras/Utils/data_utils.cs
index 57ae76695..e6db0ef72 100644
--- a/src/TensorFlowNET.Keras/Utils/data_utils.cs
+++ b/src/TensorFlowNET.Keras/Utils/data_utils.cs
@@ -40,7 +40,7 @@ public static string get_file(string fname, string origin,
             return datadir;
         }
 
-        public static (NDArray, NDArray) _remove_long_seq(int maxlen, NDArray seq, NDArray label)
+        public static (int[,], long[]) _remove_long_seq(int maxlen, int[,] seq, long[] label)
         {
             /*Removes sequences that exceed the maximum length.
 
@@ -56,19 +56,17 @@ public static (NDArray, NDArray) _remove_long_seq(int maxlen, NDArray seq, NDArr
             List<int[]> new_seq = new List<int[]>();
             List<long> new_label = new List<long>();
 
-            var seq_array = (int[,])seq.ToMultiDimArray<int>();
-            var label_array = (long[])label.ToArray<long>();
-            for (var i = 0; i < seq_array.GetLength(0); i++)
+            for (var i = 0; i < seq.GetLength(0); i++)
             {
-                if (maxlen < seq_array.GetLength(1) && seq_array[i,maxlen] != 0)
+                if (maxlen < seq.GetLength(1) && seq[i, maxlen] != 0)
                     continue;
                 int[] sentence = new int[maxlen];
-                for (var j = 0; j < maxlen && j < seq_array.GetLength(1); j++)
+                for (var j = 0; j < maxlen && j < seq.GetLength(1); j++)
                 {
-                    sentence[j] = seq_array[i, j];
+                    sentence[j] = seq[i, j];
                 }
                 new_seq.Add(sentence);
-                new_label.Add(label_array[i]);
+                new_label.Add(label[i]);
             }
 
             int[,] new_seq_array = new int[new_seq.Count, maxlen];

From 57feb65dbc96fbe383d3dec1cee05bd3f34bb292 Mon Sep 17 00:00:00 2001
From: Alexander Novikov <novikov_alexander@live.ru>
Date: Fri, 15 Sep 2023 14:57:48 +0000
Subject: [PATCH 19/77] comment IndexedSlices test

---
 .../GradientTest/GradientTest.cs                          | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
index 3ce6661cc..fc2280051 100644
--- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
@@ -488,17 +488,20 @@ public void testNoGradientForStringOutputs()
             //    self.assertTrue(isinstance(grads[0], ops.Tensor))
         }
 
-        [Ignore("FIXME")]
+        [Ignore("TODO: CompositeTensors are not supported yet.")]
         [TestMethod]
         public void testSingletonIndexedSlices()
         {
             tf.Graph().as_default();
 
+            // TODO: uncomment when CompositeTensors are supported.
+            /*
             var x = tf.placeholder(TF_DataType.TF_FLOAT);
             var y = tf.identity(x);
             var dy_indices = tf.placeholder(TF_DataType.TF_INT32);
             var dy_values = tf.placeholder(TF_DataType.TF_FLOAT);
-            Tensor dy = new IndexedSlices(dy_values, dy_indices);
+            var dy = new IndexedSlices(dy_values, dy_indices);
+           
             var dx = tf.gradients(new[] { y }, new[] { x }, grad_ys: new[] { dy })[0];
             // The IndexedSlices gradient of tf.identity is the identity map.
             using (var sess = self.cached_session())
@@ -514,6 +517,7 @@ public void testSingletonIndexedSlices()
                 var vdy = result[1];
                 self.assertEqual(vdx, vdy);
             }
+            */
 
         }
 

From 56e389154cc3252888761b7bb7c931e4dbe88064 Mon Sep 17 00:00:00 2001
From: lingbai-kong <oc371@tongji.edu.cn>
Date: Mon, 18 Sep 2023 14:21:09 +0800
Subject: [PATCH 20/77] improve unpickler speed with BufferedStream

---
 .../NumPy/Implementation/NumPyImpl.Creation.cs               | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
index fa4ef0191..c0f9e695d 100644
--- a/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
+++ b/src/TensorFlowNET.Core/NumPy/Implementation/NumPyImpl.Creation.cs
@@ -101,9 +101,10 @@ Array ReadValueMatrix(BinaryReader reader, Array matrix, int bytes, Type type, i
 
         Array ReadObjectMatrix(BinaryReader reader, Array matrix, int[] shape)
         {
-            Stream stream = reader.BaseStream;
+            Stream deflateStream = reader.BaseStream;
+            BufferedStream bufferedStream = new BufferedStream(deflateStream);
             var unpickler = new Unpickler();
-            return (MultiArrayPickleWarpper)unpickler.load(stream);
+            return (MultiArrayPickleWarpper)unpickler.load(bufferedStream);
         }
 
         public (NDArray, NDArray) meshgrid<T>(T[] array, bool copy = true, bool sparse = false)

From 725ec1e55f83bae6e4745ddf0605bd15c40fbd92 Mon Sep 17 00:00:00 2001
From: Haiping Chen <haiping008@gmail.com>
Date: Mon, 18 Sep 2023 03:05:00 -0500
Subject: [PATCH 21/77] Optimize imdb.load_data

---
 src/TensorFlowNET.Keras/Datasets/Imdb.cs | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
index 081c26cb9..1c9805189 100644
--- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs
+++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -180,10 +180,11 @@ public DatasetPass load_data(
             // 0 (padding), 1 (start), 2 (OOV)
             if (oov_char != null)
             {
-                int[,] new_xs_array = new int[xs_array.GetLength(0), xs_array.GetLength(1)];
-                for (var i = 0; i < xs_array.GetLength(0); i++)
+                var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1));
+                int[,] new_xs_array = new int[d1, d2];
+                for (var i = 0; i < d1; i++)
                 {
-                    for (var j = 0; j < xs_array.GetLength(1); j++)
+                    for (var j = 0; j < d2; j++)
                     {
                         if (xs_array[i, j] == 0 || skip_top <= xs_array[i, j] && xs_array[i, j] < num_words)
                             new_xs_array[i, j] = xs_array[i, j];
@@ -195,11 +196,12 @@ public DatasetPass load_data(
             }
             else
             {
-                int[,] new_xs_array = new int[xs_array.GetLength(0), xs_array.GetLength(1)];
-                for (var i = 0; i < xs_array.GetLength(0); i++)
+                var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1));
+                int[,] new_xs_array = new int[d1, d2];
+                for (var i = 0; i < d1; i++)
                 {
                     int k = 0;
-                    for (var j = 0; j < xs_array.GetLength(1); j++)
+                    for (var j = 0; j < d2; j++)
                     {
                         if (xs_array[i, j] == 0 || skip_top <= xs_array[i, j] && xs_array[i, j] < num_words)
                             new_xs_array[i, k++] = xs_array[i, j];

From 9552d4cb7a51ea0081be027e15645dca11ea1239 Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Thu, 21 Sep 2023 21:54:49 +0800
Subject: [PATCH 22/77] feat: add np.less and np.greater binding

---
 src/TensorFlowNET.Core/NumPy/Numpy.Math.cs | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs b/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs
index 5bc97952b..2559638b3 100644
--- a/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs
+++ b/src/TensorFlowNET.Core/NumPy/Numpy.Math.cs
@@ -85,5 +85,11 @@ public static NDArray dot(NDArray x1, NDArray x2, NDArray? axes = null, string?
 
         [AutoNumPy]
         public static NDArray add(NDArray x, NDArray y) => new NDArray(math_ops.add(x, y));
+
+        [AutoNumPy]
+        public static NDArray greater(NDArray x, NDArray y) => new NDArray(tf.greater(x, y));
+
+        [AutoNumPy]
+        public static NDArray less(NDArray x, NDArray y) => new NDArray(tf.less(x, y));
     }
 }

From f809f6eacee83336ac7971d018686b7ee8999198 Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Thu, 21 Sep 2023 21:56:22 +0800
Subject: [PATCH 23/77] fix: fix EarlyStopping

---
 .../Callbacks/Earlystopping.cs                | 64 ++++++++++++-------
 1 file changed, 42 insertions(+), 22 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs b/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs
index 36993b637..a2a2ecfe2 100644
--- a/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs
+++ b/src/TensorFlowNET.Keras/Callbacks/Earlystopping.cs
@@ -19,8 +19,10 @@ public class EarlyStopping: ICallback
     string _monitor;
     string _mode;
     bool _restore_best_weights;
-    List<IVariableV1>? _best_weights;
+    List<NDArray>? _best_weights;
     CallbackParams _parameters;
+    Func<NDArray, NDArray, NDArray> _monitor_op;
+
     public Dictionary<string, List<float>>? history { get; set; }
     // user need to pass a CallbackParams to EarlyStopping, CallbackParams at least need the model
     public EarlyStopping(CallbackParams parameters,string monitor = "val_loss", float min_delta = 0f, int patience = 0,
@@ -38,17 +40,49 @@ public EarlyStopping(CallbackParams parameters,string monitor = "val_loss", floa
         _min_delta = Math.Abs(min_delta);
         _restore_best_weights = restore_best_weights;
         _mode = mode;
-        if (mode != "auto" && mode != "min" && mode != "max")
+
+        if (_mode != "auto" && _mode != "min" && _mode != "max")
+        {
+            Console.WriteLine($"EarlyStopping mode {_mode} is unknown, fallback to auto mode.");
+            _mode = "auto";
+        }
+
+        if (_mode == "min")
+        {
+            _monitor_op = np.less;
+        }
+        else if (_mode == "max")
+        {
+            _monitor_op = np.greater;
+        }
+        else
+        {
+            if (_monitor.EndsWith("acc") || _monitor.EndsWith("accuracy") || _monitor.EndsWith("auc"))
+            {
+                _monitor_op = np.greater;
+            }
+            else
+            {
+                _monitor_op = np.less;
+            }   
+        }
+
+        if (_monitor_op == np.greater)
         {
-            Console.WriteLine("EarlyStopping mode %s is unknown, fallback to auto mode.", mode);
+            _min_delta *= 1;
+        }
+        else
+        {
+            _min_delta *= -1;
         }
     }
     public void on_train_begin()
     {
         _wait = 0;
         _stopped_epoch = 0;
+        _best = _monitor_op == np.less ? (float)np.Inf : (float)-np.Inf;
+        _best_weights = null;
         _best_epoch = 0;
-        _best = (float)np.Inf;
     }
 
     public void on_epoch_begin(int epoch)
@@ -74,7 +108,7 @@ public void on_epoch_end(int epoch, Dictionary<string, float> epoch_logs)
         // Restore the weights after first epoch if no progress is ever made.
         if (_restore_best_weights && _best_weights == null)
         {
-            _best_weights = _parameters.Model.Weights;
+            _best_weights = _parameters.Model.get_weights();
         }
         _wait += 1;
 
@@ -83,7 +117,7 @@ public void on_epoch_end(int epoch, Dictionary<string, float> epoch_logs)
             _best = current;
             _best_epoch = epoch;
             if (_restore_best_weights)
-                _best_weights = _parameters.Model.TrainableWeights;
+                _best_weights = _parameters.Model.get_weights();
             // Only restart wait if we beat both the baseline and our previous best.
             if (_baseline == 0f || _is_improvement(current, _baseline))
                 _wait = 0;
@@ -99,7 +133,7 @@ public void on_epoch_end(int epoch, Dictionary<string, float> epoch_logs)
                 {
                     Console.WriteLine($"Restoring model weights from the end of the best epoch: {_best_epoch + 1}");
                 }
-                _parameters.Model.Weights = _best_weights;
+                _parameters.Model.set_weights(_best_weights);
             }
         }
     }
@@ -131,21 +165,7 @@ float get_monitor_value(Dictionary<string, float> logs)
     }
     public bool _is_improvement(float monitor_value, float reference_value)
     {
-        bool less_op = (monitor_value - _min_delta) < reference_value;
-        bool greater_op = (monitor_value - _min_delta) >= reference_value;
-        if (_mode == "min")
-            return less_op;
-        else if (_mode == "max")
-            return greater_op;
-        else
-        {
-            if (_monitor.EndsWith("acc") || _monitor.EndsWith("accuracy") || _monitor.EndsWith("auc"))
-            {
-                return greater_op;
-            }
-            else
-                return less_op;
-        }
+        return _monitor_op(monitor_value - _min_delta, reference_value);
     }
 
     public void on_test_end(Dictionary<string, float> logs)

From 9fb847991a1e45c0dbf40fd896b36b6d91953a24 Mon Sep 17 00:00:00 2001
From: lingbai-kong <oc371@tongji.edu.cn>
Date: Fri, 22 Sep 2023 18:34:08 +0800
Subject: [PATCH 24/77] fix: adjust imdb dataset loader for faster loading
 speed

---
 src/TensorFlowNET.Keras/Datasets/Imdb.cs    | 29 ++++++++++++---------
 src/TensorFlowNET.Keras/Utils/data_utils.cs |  8 +++---
 2 files changed, 22 insertions(+), 15 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Datasets/Imdb.cs b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
index 1c9805189..4d6df913b 100644
--- a/src/TensorFlowNET.Keras/Datasets/Imdb.cs
+++ b/src/TensorFlowNET.Keras/Datasets/Imdb.cs
@@ -112,35 +112,39 @@ public DatasetPass load_data(
 
             if (start_char != null)
             {
-                int[,] new_x_train_array = new int[x_train_array.GetLength(0), x_train_array.GetLength(1) + 1];
-                for (var i = 0; i < x_train_array.GetLength(0); i++)
+                var (d1, d2) = (x_train_array.GetLength(0), x_train_array.GetLength(1));
+                int[,] new_x_train_array = new int[d1, d2 + 1];
+                for (var i = 0; i < d1; i++)
                 {
                     new_x_train_array[i, 0] = (int)start_char;
-                    Array.Copy(x_train_array, i * x_train_array.GetLength(1), new_x_train_array, i * new_x_train_array.GetLength(1) + 1, x_train_array.GetLength(1));
+                    Array.Copy(x_train_array, i * d2, new_x_train_array, i * (d2 + 1) + 1, d2);
                 }
-                int[,] new_x_test_array = new int[x_test_array.GetLength(0), x_test_array.GetLength(1) + 1];
-                for (var i = 0; i < x_test_array.GetLength(0); i++)
+                (d1, d2) = (x_test_array.GetLength(0), x_test_array.GetLength(1));
+                int[,] new_x_test_array = new int[d1, d2 + 1];
+                for (var i = 0; i < d1; i++)
                 {
                     new_x_test_array[i, 0] = (int)start_char;
-                    Array.Copy(x_test_array, i * x_test_array.GetLength(1), new_x_test_array, i * new_x_test_array.GetLength(1) + 1, x_test_array.GetLength(1));
+                    Array.Copy(x_test_array, i * d2, new_x_test_array, i * (d2 + 1) + 1, d2);
                 }
                 x_train_array = new_x_train_array;
                 x_test_array = new_x_test_array;
             }
             else if (index_from != 0)
             {
-                for (var i = 0; i < x_train_array.GetLength(0); i++)
+                var (d1, d2) = (x_train_array.GetLength(0), x_train_array.GetLength(1));
+                for (var i = 0; i < d1; i++)
                 {
-                    for (var j = 0; j < x_train_array.GetLength(1); j++)
+                    for (var j = 0; j < d2; j++)
                     {
                         if (x_train_array[i, j] == 0)
                             break;
                         x_train_array[i, j] += index_from;
                     }
                 }
-                for (var i = 0; i < x_test_array.GetLength(0); i++)
+                (d1, d2) = (x_test_array.GetLength(0), x_test_array.GetLength(1));
+                for (var i = 0; i < d1; i++)
                 {
-                    for (var j = 0; j < x_test_array.GetLength(1); j++)
+                    for (var j = 0; j < d2; j++)
                     {
                         if (x_test_array[i, j] == 0)
                             break;
@@ -169,9 +173,10 @@ public DatasetPass load_data(
 
             if (num_words == null)
             {
+                var (d1, d2) = (xs_array.GetLength(0), xs_array.GetLength(1));
                 num_words = 0;
-                for (var i = 0; i < xs_array.GetLength(0); i++)
-                    for (var j = 0; j < xs_array.GetLength(1); j++)
+                for (var i = 0; i < d1; i++)
+                    for (var j = 0; j < d2; j++)
                         num_words = max((int)num_words, (int)xs_array[i, j]);
             }
 
diff --git a/src/TensorFlowNET.Keras/Utils/data_utils.cs b/src/TensorFlowNET.Keras/Utils/data_utils.cs
index e6db0ef72..b0bc15540 100644
--- a/src/TensorFlowNET.Keras/Utils/data_utils.cs
+++ b/src/TensorFlowNET.Keras/Utils/data_utils.cs
@@ -53,15 +53,17 @@ public static (int[,], long[]) _remove_long_seq(int maxlen, int[,] seq, long[] l
                     new_seq, new_label: shortened lists for `seq` and `label`.
 
             */
+            var nRow = seq.GetLength(0);
+            var nCol = seq.GetLength(1);
             List<int[]> new_seq = new List<int[]>();
             List<long> new_label = new List<long>();
 
-            for (var i = 0; i < seq.GetLength(0); i++)
+            for (var i = 0; i < nRow; i++)
             {
-                if (maxlen < seq.GetLength(1) && seq[i, maxlen] != 0)
+                if (maxlen < nCol && seq[i, maxlen] != 0)
                     continue;
                 int[] sentence = new int[maxlen];
-                for (var j = 0; j < maxlen && j < seq.GetLength(1); j++)
+                for (var j = 0; j < maxlen && j < nCol; j++)
                 {
                     sentence[j] = seq[i, j];
                 }

From eb4c1f4fb01bb02b7c7f87d5bee958bd9d4b0e42 Mon Sep 17 00:00:00 2001
From: Haiping Chen <haiping008@gmail.com>
Date: Sat, 23 Sep 2023 20:57:48 -0500
Subject: [PATCH 25/77] Release v0.110.4.

---
 src/TensorFlowNET.Core/Tensorflow.Binding.csproj | 9 +++++----
 src/TensorFlowNET.Keras/Tensorflow.Keras.csproj  | 6 +++---
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
index be714618d..85c41bd2a 100644
--- a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
+++ b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
@@ -5,7 +5,7 @@
     <AssemblyName>Tensorflow.Binding</AssemblyName>
     <RootNamespace>Tensorflow</RootNamespace>
     <TargetTensorFlow>2.11.0</TargetTensorFlow>
-    <Version>0.110.3</Version>
+    <Version>0.110.4</Version>
     <LangVersion>10.0</LangVersion>
     <Nullable>enable</Nullable>
     <Authors>Haiping Chen, Eli Belash, Yaohui Liu, Meinrad Recheis</Authors>
@@ -25,7 +25,8 @@ https://tensorflownet.readthedocs.io</Description>
 		tf.net 0.110.x and above are based on tensorflow native 2.11.0
 		* Support RNN, LSTM model.
 		* Support Transformer model.
-		
+		* Added IMDB dataset.
+
 		tf.net 0.100.x and above are based on tensorflow native 2.10.0
 
 		* Eager Mode is added finally.
@@ -43,7 +44,7 @@ https://tensorflownet.readthedocs.io</Description>
 		tf.net 0.10x.x aligns with TensorFlow v2.10.x native library.
 		tf.net 0.11x.x aligns with TensorFlow v2.11.x native library.
 	</PackageReleaseNotes>
-    <FileVersion>0.110.3.0</FileVersion>
+    <FileVersion>0.110.4.0</FileVersion>
     <PackageLicenseFile>LICENSE</PackageLicenseFile>
     <PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
     <PackageOutputPath>packages</PackageOutputPath>
@@ -174,7 +175,7 @@ https://tensorflownet.readthedocs.io</Description>
   <ItemGroup>
     <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.149" />
     <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
-    <PackageReference Include="OneOf" Version="3.0.255" />
+    <PackageReference Include="OneOf" Version="3.0.263" />
     <PackageReference Include="Protobuf.Text" Version="0.7.1" />
     <PackageReference Include="Razorvine.Pickle" Version="1.4.0" />
     <PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
diff --git a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
index 36d1bc1d4..a0ee22284 100644
--- a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
+++ b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
@@ -7,7 +7,7 @@
     <Nullable>enable</Nullable>
     <RootNamespace>Tensorflow.Keras</RootNamespace>
     <Platforms>AnyCPU;x64</Platforms>
-    <Version>0.11.3</Version>
+    <Version>0.11.4</Version>
     <Authors>Haiping Chen</Authors>
     <Product>Keras for .NET</Product>
     <Copyright>Apache 2.0, Haiping Chen since 2018</Copyright>
@@ -42,8 +42,8 @@ Keras is an API designed for human beings, not machines. Keras follows best prac
     <RepositoryType>Git</RepositoryType>
     <SignAssembly>False</SignAssembly>
     <AssemblyOriginatorKeyFile>Open.snk</AssemblyOriginatorKeyFile>
-    <AssemblyVersion>0.11.3.0</AssemblyVersion>
-    <FileVersion>0.11.3.0</FileVersion>
+    <AssemblyVersion>0.11.4.0</AssemblyVersion>
+    <FileVersion>0.11.4.0</FileVersion>
     <PackageLicenseFile>LICENSE</PackageLicenseFile>
     <Configurations>Debug;Release;GPU</Configurations>
   </PropertyGroup>

From 21210795d0fb7963c13fb99604b7e7e46df2443d Mon Sep 17 00:00:00 2001
From: Alexander Novikov <novikov_alexander@live.ru>
Date: Wed, 27 Sep 2023 13:16:28 +0000
Subject: [PATCH 26/77] gradient descent tests

---
 .../Variables/variables.py.cs                 |   7 +-
 .../GradientTest/GradientTest.cs              |   2 -
 test/TensorFlowNET.UnitTest/PythonTest.cs     | 178 +++++++++++++++++-
 .../Training/GradientDescentOptimizerTests.cs |  68 +++++++
 4 files changed, 250 insertions(+), 5 deletions(-)
 create mode 100644 test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs

diff --git a/src/TensorFlowNET.Core/Variables/variables.py.cs b/src/TensorFlowNET.Core/Variables/variables.py.cs
index 0c07e0243..f3ae248e6 100644
--- a/src/TensorFlowNET.Core/Variables/variables.py.cs
+++ b/src/TensorFlowNET.Core/Variables/variables.py.cs
@@ -72,7 +72,9 @@ public static List<IVariableV1> global_variables(string scope = null)
         public static Operation variables_initializer(IVariableV1[] var_list, string name = "init")
         {
             if (var_list.Length > 0)
+            {
                 return control_flow_ops.group(var_list.Select(x => x.Initializer).ToArray(), name);
+            }
             else
                 return gen_control_flow_ops.no_op(name: name);
         }
@@ -155,7 +157,10 @@ public static Operation _safe_initial_value_from_op(string name, Operation op, D
 
         public static Tensor global_variables_initializer()
         {
-            throw new NotImplementedException();
+            // if context.executing_eagerly():
+            //      return control_flow_ops.no_op(name = "global_variables_initializer")
+            var group =  variables_initializer(global_variables().ToArray());
+            return group;
         }
     }
 }
diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
index fc2280051..e2d6db912 100644
--- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
@@ -776,8 +776,6 @@ public void testUnconnectedGradientsNoneUnconnectedGradients()
         [TestMethod]
         public void testUnconnectedGradientsZerosUnconnectedGradients()
         {
-
-
             //def testUnconnectedGradientsZerosUnconnectedGradients(self):
             //  with ops.Graph().as_default():
             //    x = constant(1.0, shape=[2, 2])
diff --git a/test/TensorFlowNET.UnitTest/PythonTest.cs b/test/TensorFlowNET.UnitTest/PythonTest.cs
index 50cc2b328..12fd72360 100644
--- a/test/TensorFlowNET.UnitTest/PythonTest.cs
+++ b/test/TensorFlowNET.UnitTest/PythonTest.cs
@@ -144,6 +144,37 @@ public void assertAllClose(double value, NDArray array2, double eps = 1e-5)
             Assert.IsTrue(np.allclose(array1, array2, rtol: eps));
         }
 
+        private class CollectionComparer : System.Collections.IComparer
+        {
+            private readonly double _epsilon;
+
+            public CollectionComparer(double eps = 1e-06) {
+                _epsilon = eps;
+            }
+            public int Compare(object x, object y)
+            {
+                var a = (double)x;
+                var b = (double)y;
+
+                double delta = Math.Abs(a - b);
+                if (delta < _epsilon)
+                {
+                    return 0;
+                }
+                return a.CompareTo(b);
+            }
+        }
+
+        public void assertAllCloseAccordingToType<T>(
+            T[] expected,
+            T[] given,
+            double eps = 1e-6,
+            float float_eps = 1e-6f)
+        {
+            // TODO: check if any of arguments is not double and change toletance
+            CollectionAssert.AreEqual(expected, given, new CollectionComparer(eps));
+        }
+
         public void assertProtoEquals(object toProto, object o)
         {
             throw new NotImplementedException();
@@ -153,6 +184,20 @@ public void assertProtoEquals(object toProto, object o)
 
         #region tensor evaluation and test session
 
+        private Session _cached_session = null;
+        private Graph _cached_graph = null;
+        private object _cached_config = null;
+        private bool _cached_force_gpu = false;
+
+        private void _ClearCachedSession()
+        {
+            if (self._cached_session != null)
+            {
+                self._cached_session.Dispose();
+                self._cached_session = null;
+            }
+        }
+
         //protected object _eval_helper(Tensor[] tensors)
         //{
         //    if (tensors == null)
@@ -218,9 +263,56 @@ public T evaluate<T>(Tensor tensor)
         }
 
 
-        public Session cached_session()
+        ///Returns a TensorFlow Session for use in executing tests.
+        public Session cached_session(
+            Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false)
         {
-            throw new NotImplementedException();
+            // This method behaves differently than self.session(): for performance reasons
+            // `cached_session` will by default reuse the same session within the same
+            // test.The session returned by this function will only be closed at the end
+            // of the test(in the TearDown function).
+
+            // Use the `use_gpu` and `force_gpu` options to control where ops are run.If
+            // `force_gpu` is True, all ops are pinned to `/ device:GPU:0`. Otherwise, if
+            // `use_gpu` is True, TensorFlow tries to run as many ops on the GPU as
+            // possible.If both `force_gpu and `use_gpu` are False, all ops are pinned to
+            // the CPU.
+
+            // Example:
+            // python
+            // class MyOperatorTest(test_util.TensorFlowTestCase) :
+            //   def testMyOperator(self):
+            //     with self.cached_session() as sess:
+            //       valid_input = [1.0, 2.0, 3.0, 4.0, 5.0]
+            //     result = MyOperator(valid_input).eval()
+            //       self.assertEqual(result, [1.0, 2.0, 3.0, 5.0, 8.0]
+            //       invalid_input = [-1.0, 2.0, 7.0]
+            //     with self.assertRaisesOpError("negative input not supported"):
+            //         MyOperator(invalid_input).eval()
+
+
+            // Args:
+            //   graph: Optional graph to use during the returned session.
+            //   config: An optional config_pb2.ConfigProto to use to configure the
+            //     session.
+            //   use_gpu: If True, attempt to run as many ops as possible on GPU.
+            //   force_gpu: If True, pin all ops to `/device:GPU:0`.
+
+            // Yields:
+            //   A Session object that should be used as a context manager to surround
+            //   the graph building and execution code in a test case.
+
+
+            // TODO:
+            //  if context.executing_eagerly():
+            //    return self._eval_helper(tensors)
+            //  else:
+            {
+                var sess = self._get_cached_session(
+                    graph, config, force_gpu, crash_if_inconsistent_args: true);
+                using var cached = self._constrain_devices_and_set_default(sess, use_gpu, force_gpu);
+                return cached;
+            }
         }
 
         //Returns a TensorFlow Session for use in executing tests.
@@ -268,6 +360,40 @@ public Session session(Graph graph = null, object config = null, bool use_gpu =
             return s.as_default();
         }
 
+        private Session _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu)
+        {
+            // Set the session and its graph to global default and constrain devices."""
+            if (tf.executing_eagerly())
+                return null;
+            else
+            {
+                sess.graph.as_default();
+                sess.as_default();
+                {
+                    if (force_gpu)
+                    {
+                        // TODO:
+
+                        // Use the name of an actual device if one is detected, or
+                        // '/device:GPU:0' otherwise
+                        /* var gpu_name = gpu_device_name();
+                        if (!gpu_name)
+                            gpu_name = "/device:GPU:0"
+                        using (sess.graph.device(gpu_name)) {
+                            yield return sess;
+                        }*/
+                        return sess;
+                    }
+                    else if (use_gpu)
+                        return sess;
+                    else
+                        using (sess.graph.device("/device:CPU:0"))
+                            return sess;
+                }
+
+            }
+        }
+
         // See session() for details.
         private Session _create_session(Graph graph, object cfg, bool forceGpu)
         {
@@ -312,6 +438,54 @@ private Session _create_session(Graph graph, object cfg, bool forceGpu)
             return new Session(graph);//, config = prepare_config(config))
         }
 
+        private Session _get_cached_session(
+                          Graph graph = null,
+                          object config = null,
+                          bool force_gpu = false,
+                          bool crash_if_inconsistent_args = true)
+        {
+            // See cached_session() for documentation.
+            if (self._cached_session == null)
+            {
+                var sess = self._create_session(graph, config, force_gpu);
+                self._cached_session = sess;
+                self._cached_graph = graph;
+                self._cached_config = config;
+                self._cached_force_gpu = force_gpu;
+                return sess;
+            }
+            else
+            {
+
+                if (crash_if_inconsistent_args && !self._cached_graph.Equals(graph))
+                    throw new ValueError(@"The graph used to get the cached session is 
+                                           different than the one that was used to create the
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                if (crash_if_inconsistent_args && !self._cached_config.Equals(config))
+                {
+                    throw new ValueError(@"The config used to get the cached session is 
+                                           different than the one that was used to create the 
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                }
+                if (crash_if_inconsistent_args && !self._cached_force_gpu.Equals(force_gpu))
+                {
+                    throw new ValueError(@"The force_gpu value used to get the cached session is 
+                                           different than the one that was used to create the 
+                                           session. Maybe create a new session with 
+                                           self.session()");
+                }
+                return _cached_session;
+            }
+        }
+
+        [TestCleanup]
+        public void Cleanup()
+        {
+            _ClearCachedSession();
+        }
+
         #endregion
 
         public void AssetSequenceEqual<T>(T[] a, T[] b)
diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
new file mode 100644
index 000000000..977544ae9
--- /dev/null
+++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
@@ -0,0 +1,68 @@
+﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+using System;
+using System.Linq;
+using System.Runtime.Intrinsics.X86;
+using System.Security.AccessControl;
+using Tensorflow.NumPy;
+using TensorFlowNET.UnitTest;
+using static Tensorflow.Binding;
+
+namespace Tensorflow.Keras.UnitTest.Optimizers
+{
+    [TestClass]
+    public class GradientDescentOptimizerTest : PythonTest
+    {
+        private void TestBasicGeneric<T>() where T : struct
+        {
+            var dtype = Type.GetTypeCode(typeof(T)) switch
+            {
+                TypeCode.Single => np.float32,
+                TypeCode.Double => np.float64,
+                _ => throw new NotImplementedException(),
+            };
+
+            // train.GradientDescentOptimizer is V1 only API.
+            tf.Graph().as_default();
+            using (self.cached_session())
+            {
+                var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype);
+                var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype);
+                var grads0 = tf.constant(new[] { 0.1, 0.1 }, dtype: dtype);
+                var grads1 = tf.constant(new[] { 0.01, 0.01 }, dtype: dtype);
+                var optimizer = tf.train.GradientDescentOptimizer(3.0f);
+                var grads_and_vars = new[] {
+                    Tuple.Create(grads0, var0 as IVariableV1),
+                    Tuple.Create(grads1, var1 as IVariableV1)
+                };
+                var sgd_op = optimizer.apply_gradients(grads_and_vars);
+
+                var global_variables = variables.global_variables_initializer();
+                self.evaluate<T>(global_variables);
+                // Fetch params to validate initial values
+                // TODO: use self.evaluate<T[]> instead of self.evaluate<double[]>
+                self.assertAllCloseAccordingToType(new double[] { 1.0, 2.0 }, self.evaluate<double[]>(var0));
+                self.assertAllCloseAccordingToType(new double[] { 3.0, 4.0 }, self.evaluate<double[]>(var1));
+                // Run 1 step of sgd
+                sgd_op.run();
+                // Validate updated params
+                self.assertAllCloseAccordingToType(
+                    new double[] { 1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1 },
+                    self.evaluate<double[]>(var0));
+                self.assertAllCloseAccordingToType(
+                    new double[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 },
+                    self.evaluate<double[]>(var1));
+                // TODO: self.assertEqual(0, len(optimizer.variables()));
+            }
+        }
+
+        [TestMethod]
+        public void TestBasic()
+        {
+            //TODO: add np.half
+            TestBasicGeneric<float>();
+            TestBasicGeneric<double>();
+        }
+
+
+    }
+}

From 02bfb9af176c13e8c37fe42ce600f4600ab8938d Mon Sep 17 00:00:00 2001
From: Beacontownfc <19636977267@qq.com>
Date: Thu, 28 Sep 2023 15:22:13 +0000
Subject: [PATCH 27/77] improve raggedtensor

---
 .../Operations/array_ops.cs                   | 13 +++++
 .../Tensors/Ragged/RaggedTensor.cs            | 33 +++++++++++
 .../Tensors/Ragged/RowPartition.cs            | 55 +++++++++++++++++++
 .../ManagedAPI/RaggedTensorTest.cs            | 26 +++++++++
 4 files changed, 127 insertions(+)
 create mode 100644 test/TensorFlowNET.UnitTest/ManagedAPI/RaggedTensorTest.cs

diff --git a/src/TensorFlowNET.Core/Operations/array_ops.cs b/src/TensorFlowNET.Core/Operations/array_ops.cs
index f80dcd2c4..fdc53cd7e 100644
--- a/src/TensorFlowNET.Core/Operations/array_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/array_ops.cs
@@ -1139,5 +1139,18 @@ public static Tensor placeholder(TF_DataType dtype, Shape shape = null, string n
             var _op = tf.OpDefLib._apply_op_helper("Placeholder", name: name, args: new { dtype, shape });
             return _op.output;
         }
+
+        public static int get_positive_axis(int axis, int ndims=-100, string axis_name="axis", string ndims_name= "ndims")
+        {
+            if(ndims != -100)
+            {
+                if (axis >= 0 && axis < ndims) return axis;
+                else if (-ndims <= axis && axis < 0) return axis + ndims;
+                else throw new ValueError($"{axis_name}={axis} out of bounds:expected {-ndims}<={axis_name}<{ndims}");
+                
+            } else if(axis < 0) throw new ValueError($"{axis_name}={axis} may only be negative if {ndims_name} is statically known.");
+            return axis;
+        }
+
     }
 }
diff --git a/src/TensorFlowNET.Core/Tensors/Ragged/RaggedTensor.cs b/src/TensorFlowNET.Core/Tensors/Ragged/RaggedTensor.cs
index 4f85e1081..0f09d4128 100644
--- a/src/TensorFlowNET.Core/Tensors/Ragged/RaggedTensor.cs
+++ b/src/TensorFlowNET.Core/Tensors/Ragged/RaggedTensor.cs
@@ -163,5 +163,38 @@ public static implicit operator RaggedTensor(Tensor tensor)
         {
             return tensor.Tag as RaggedTensor;
         }
+        public Tensor nrows(TF_DataType out_type, string name = null)
+        {
+            tf_with(ops.name_scope(name, "RaggedNRows"), scope =>
+            {
+                return math_ops.cast(this._row_partition.nrows(), dtype: out_type);
+            });
+            return null;
+        }
+        public RaggedTensor row_lengths(int axis=-1, string name=null)
+        {
+            if (axis == 0) return this._row_partition.nrows();
+            if (axis == 1) return this._row_partition.row_lengths();
+            var values = (RaggedTensor)this._values;
+            axis = array_ops.get_positive_axis(
+                axis, this.shape.rank, ndims_name: "rank(this)");
+            if (axis == 0) return this.nrows(this._row_partition.GetDataType());
+            else if (axis == 1)
+            {
+                var splits = this._row_partition.row_splits;
+                return splits[new Slice(start: 1)] - splits[new Slice(stop: -1)];
+
+            }
+            else if (this._values is RaggedTensor)
+            {
+                return values.row_lengths(axis - 1);
+            }
+            else
+            {
+                var shape = array_ops.shape(values, out_type: this._row_partition.GetDataType());
+                return array_ops.ones(shape[new Slice(stop:axis - 1)], this._row_partition.GetDataType()) *
+                            shape[axis - 1];
+            }
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Tensors/Ragged/RowPartition.cs b/src/TensorFlowNET.Core/Tensors/Ragged/RowPartition.cs
index 29dc525df..9e242ff38 100644
--- a/src/TensorFlowNET.Core/Tensors/Ragged/RowPartition.cs
+++ b/src/TensorFlowNET.Core/Tensors/Ragged/RowPartition.cs
@@ -14,10 +14,15 @@ You may obtain a copy of the License at
    limitations under the License.
 ******************************************************************************/
 
+using Serilog.Debugging;
 using System;
+using System.Collections.Concurrent;
 using System.Collections.Generic;
+//using System.ComponentModel.DataAnnotations;
 using System.Text;
+using System.Xml.Linq;
 using Tensorflow.Framework;
+using Tensorflow.NumPy;
 using static Tensorflow.Binding;
 
 namespace Tensorflow
@@ -99,5 +104,55 @@ public static RowPartition from_row_splits(Tensor row_splits,
                 return new RowPartition(row_splits);
             });
         }
+
+        public static RowPartition from_row_lengths(Tensor row_lengths,
+                       bool validate=true,
+                       TF_DataType dtype = TF_DataType.TF_INT32,
+                       TF_DataType dtype_hint= TF_DataType.TF_INT32)
+        {
+            row_lengths = _convert_row_partition(
+                row_lengths, "row_lengths", dtype_hint: dtype_hint, dtype: dtype);
+            Tensor row_limits = math_ops.cumsum<Tensor>(row_lengths, tf.constant(-1));
+            Tensor row_splits = array_ops.concat(new Tensor[] { tf.convert_to_tensor(np.array(new int[] { 0 }, TF_DataType.TF_INT64)), row_limits }, axis:0);
+            return new RowPartition(row_splits: row_splits, row_lengths: row_lengths);
+        }
+
+        public static Tensor _convert_row_partition(Tensor partition, string name, TF_DataType dtype, 
+            TF_DataType dtype_hint= TF_DataType.TF_INT64)
+        {
+            if (partition is NDArray && partition.GetDataType() == np.int32) partition = ops.convert_to_tensor(partition, name: name);
+            if (partition.GetDataType() != np.int32 && partition.GetDataType() != np.int64) throw new ValueError($"{name} must have dtype int32 or int64");
+            return partition;
+        }
+
+        public Tensor nrows()
+        {
+            /*Returns the number of rows created by this `RowPartition*/
+            if (this._nrows != null) return this._nrows;
+            var nsplits = tensor_shape.dimension_at_index(this._row_splits.shape, 0);
+            if (nsplits == null) return array_ops.shape(this._row_splits, out_type: this.row_splits.dtype)[0] - 1;
+            else return constant_op.constant(nsplits.value - 1, dtype: this.row_splits.dtype);
+        }
+
+        public Tensor row_lengths()
+        {
+  
+            if (this._row_splits != null)
+            {
+                int nrows_plus_one = tensor_shape.dimension_value(this._row_splits.shape[0]);
+                return tf.constant(nrows_plus_one - 1);
+                
+            }
+            if (this._row_lengths != null)
+            {
+                var nrows = tensor_shape.dimension_value(this._row_lengths.shape[0]);
+                return tf.constant(nrows);
+            }
+            if(this._nrows != null)
+            {
+                return tensor_util.constant_value(this._nrows);
+            }
+            return tf.constant(-1);
+        }
     }
 }
diff --git a/test/TensorFlowNET.UnitTest/ManagedAPI/RaggedTensorTest.cs b/test/TensorFlowNET.UnitTest/ManagedAPI/RaggedTensorTest.cs
new file mode 100644
index 000000000..7a3de882e
--- /dev/null
+++ b/test/TensorFlowNET.UnitTest/ManagedAPI/RaggedTensorTest.cs
@@ -0,0 +1,26 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using System.Threading.Tasks;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
+using Tensorflow;
+using Tensorflow.NumPy;
+using static Tensorflow.Binding;
+
+namespace TensorFlowNET.UnitTest.ManagedAPI
+{
+    public class RaggedTensorTest :EagerModeTestBase
+    {
+        [TestMethod]
+        public void Test_from_row_lengths()
+        {
+            var row_lengths = tf.convert_to_tensor(np.array(new int[] { 2, 0, 3, 1, 1 }, TF_DataType.TF_INT64));
+            var rp = RowPartition.from_row_lengths(row_lengths, validate: false);
+            var rp_row_lengths = rp.row_lengths();
+            var rp_nrows = rp.nrows();
+            Assert.IsTrue(rp_nrows.ToArray<long>()[0] == rp.nrows().ToArray<long>()[0]);
+
+        }
+    }
+}

From f5af07ce5efc938686c897db57f0a33ec371adec Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Mon, 2 Oct 2023 00:23:56 +0800
Subject: [PATCH 28/77] feat: add the implementation of sample_weight in
 model.fit

---
 .../Keras/ArgsDefinition/DataAdapterArgs.cs   |   3 +
 .../Keras/ArgsDefinition/DataHandlerArgs.cs   |   3 +
 src/TensorFlowNET.Core/Keras/Engine/IModel.cs |  11 +-
 src/TensorFlowNET.Core/Util/Data.cs           |  66 +++++++++
 .../Engine/DataAdapters/DataAdapter.cs        |  59 ++++++++
 .../Engine/DataAdapters/DataHandler.cs        |   3 +
 .../Engine/DataAdapters/IDataAdapter.cs       |   2 +
 .../DataAdapters/TensorLikeDataAdapter.cs     |   7 +-
 .../Engine/LossesContainer.cs                 |   4 +-
 .../Engine/Model.Evaluate.cs                  |  19 ++-
 src/TensorFlowNET.Keras/Engine/Model.Fit.cs   | 129 ++++++------------
 src/TensorFlowNET.Keras/Engine/Model.Train.cs |  40 +++++-
 .../Layers/Rnn.Test.cs                        |   4 +-
 13 files changed, 250 insertions(+), 100 deletions(-)
 create mode 100644 src/TensorFlowNET.Core/Util/Data.cs

diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataAdapterArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataAdapterArgs.cs
index 78882e82d..ba0332836 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataAdapterArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataAdapterArgs.cs
@@ -1,5 +1,6 @@
 ﻿using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Saving;
+using Tensorflow.NumPy;
 
 namespace Tensorflow.Keras.ArgsDefinition
 {
@@ -16,5 +17,7 @@ public class DataAdapterArgs: IKerasConfig
         public int Worker { get; set; }
         public bool UseMultiprocessing { get; set; }
         public IModel Model { get; set; }
+        public Dictionary<int, float> ClassWeight = null;
+        public NDArray SampleWeight = null;
     }
 }
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataHandlerArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataHandlerArgs.cs
index 82530e950..72d0bb811 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataHandlerArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/DataHandlerArgs.cs
@@ -1,5 +1,6 @@
 ﻿using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Saving;
+using Tensorflow.NumPy;
 
 namespace Tensorflow.Keras.ArgsDefinition
 {
@@ -18,5 +19,7 @@ public class DataHandlerArgs: IKerasConfig
         public bool UseMultiprocessing { get; set; } = false;
         public IModel Model { get; set; }
         public IVariableV1 StepsPerExecution { get; set; }
+        public Dictionary<int, float> ClassWeight = null;
+        public NDArray SampleWeight = null;
     }
 }
diff --git a/src/TensorFlowNET.Core/Keras/Engine/IModel.cs b/src/TensorFlowNET.Core/Keras/Engine/IModel.cs
index 19f3df9ba..1840f88b9 100644
--- a/src/TensorFlowNET.Core/Keras/Engine/IModel.cs
+++ b/src/TensorFlowNET.Core/Keras/Engine/IModel.cs
@@ -3,6 +3,7 @@
 using Tensorflow.Keras.Metrics;
 using Tensorflow.Keras.Saving;
 using Tensorflow.NumPy;
+using Tensorflow.Util;
 
 namespace Tensorflow.Keras.Engine;
 
@@ -22,8 +23,10 @@ ICallback fit(NDArray x, NDArray y,
             int verbose = 1,
             List<ICallback> callbacks = null,
             float validation_split = 0f,
-            (NDArray val_x, NDArray val_y)? validation_data = null,
+            ValidationDataPack validation_data = null,
             bool shuffle = true,
+            Dictionary<int, float> class_weight = null,
+            NDArray sample_weight = null,
             int initial_epoch = 0,
             int max_queue_size = 10,
             int workers = 1,
@@ -35,8 +38,10 @@ ICallback fit(IEnumerable<NDArray> x, NDArray y,
             int verbose = 1,
             List<ICallback> callbacks = null,
             float validation_split = 0f,
-            (IEnumerable<NDArray> val_x, NDArray val_y)? validation_data = null,
+            ValidationDataPack validation_data = null,
             bool shuffle = true,
+            Dictionary<int, float> class_weight = null,
+            NDArray sample_weight = null,
             int initial_epoch = 0,
             int max_queue_size = 10,
             int workers = 1,
@@ -63,6 +68,8 @@ void load_weights(string filepath,
     Dictionary<string, float> evaluate(NDArray x, NDArray y,
             int batch_size = -1,
             int verbose = 1,
+            NDArray sample_weight = null,
+
             int steps = -1,
             int max_queue_size = 10,
             int workers = 1,
diff --git a/src/TensorFlowNET.Core/Util/Data.cs b/src/TensorFlowNET.Core/Util/Data.cs
new file mode 100644
index 000000000..a14c69b18
--- /dev/null
+++ b/src/TensorFlowNET.Core/Util/Data.cs
@@ -0,0 +1,66 @@
+﻿using Tensorflow.NumPy;
+
+namespace Tensorflow.Util
+{
+    /// <summary>
+    /// ValidationDataPack is used to pass validation data to fit method.
+    /// It can recive data which could be A tuple `(x_val, xy_val)` or `(x_val, y_val, sample_weight_val)` of Numpy arrays.
+    /// </summary>
+    public class ValidationDataPack
+    {
+        public NDArray val_x;
+        public NDArray val_y;
+        public NDArray val_sample_weight = null;
+
+        public ValidationDataPack((NDArray, NDArray) validation_data)
+        {
+            this.val_x = validation_data.Item1;
+            this.val_y = validation_data.Item2;
+        }
+
+        public ValidationDataPack((NDArray, NDArray, NDArray) validation_data)
+        {
+            this.val_x = validation_data.Item1;
+            this.val_y = validation_data.Item2;
+            this.val_sample_weight = validation_data.Item3;
+        }
+
+        public ValidationDataPack((IEnumerable<NDArray>, NDArray) validation_data)
+        {
+            this.val_x = validation_data.Item1.ToArray()[0];
+            this.val_y = validation_data.Item2;
+        }
+
+        public ValidationDataPack((IEnumerable<NDArray>, NDArray, NDArray) validation_data)
+        {
+            this.val_x = validation_data.Item1.ToArray()[0];
+            this.val_y = validation_data.Item2;
+            this.val_sample_weight = validation_data.Item3;
+        }
+
+        public static implicit operator ValidationDataPack((NDArray, NDArray) validation_data)
+            => new ValidationDataPack(validation_data);
+
+        public static implicit operator ValidationDataPack((NDArray, NDArray, NDArray) validation_data)
+            => new ValidationDataPack(validation_data);
+
+        public static implicit operator ValidationDataPack((IEnumerable<NDArray>, NDArray) validation_data)
+            => new ValidationDataPack(validation_data);
+
+        public static implicit operator ValidationDataPack((IEnumerable<NDArray>, NDArray, NDArray) validation_data)
+            => new ValidationDataPack(validation_data);
+
+        public void Deconstruct(out NDArray val_x, out NDArray val_y)
+        {
+            val_x = this.val_x;
+            val_y = this.val_y;
+        }
+
+        public void Deconstruct(out NDArray val_x, out NDArray val_y, out NDArray val_sample_weight)
+        {
+            val_x = this.val_x;
+            val_y = this.val_y;
+            val_sample_weight = this.val_sample_weight;
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs
index 6c7d53b2f..b2750496a 100644
--- a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs
+++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs
@@ -2,6 +2,7 @@
 using System.Collections.Generic;
 using System.Text;
 using Tensorflow.Keras.ArgsDefinition;
+using Tensorflow.Util;
 
 namespace Tensorflow.Keras.Engine.DataAdapters
 {
@@ -34,9 +35,67 @@ public virtual (Tensors, Tensors) Expand1d(Tensors x, Tensors y)
             return (x, y);
         }
 
+        public virtual (Tensors, Tensors, Tensors) Expand1d(Tensors x, Tensors y, Tensors sample_weight)
+        {
+            for (int i = 0; i < x.Length; i++)
+            {
+                if (x[i].shape.ndim == 1)
+                    x[i] = array_ops.expand_dims(x[i], axis: -1);
+            }
+            for (int i = 0; i < y.Length; i++)
+            {
+                if (y[i].shape.ndim == 1)
+                    y[i] = array_ops.expand_dims(y[i], axis: -1);
+            }
+            for (int i = 0; i < sample_weight.Length; i++)
+            {
+                if (sample_weight[i].shape.ndim == 1)
+                    sample_weight[i] = array_ops.expand_dims(sample_weight[i], axis: -1);
+            }
+            return (x, y, sample_weight);
+        }
+
         public virtual bool ShouldRecreateIterator()
         {
             return true;
         }
+
+        public static ((NDArray, NDArray, NDArray),ValidationDataPack) train_validation_split((NDArray, NDArray, NDArray) x_y_sample_weight, float validation_split)
+        {
+            var x = x_y_sample_weight.Item1;
+            var y = x_y_sample_weight.Item2;
+            var sample_weight = x_y_sample_weight.Item3;
+            int train_count = Convert.ToInt32(x.dims[0] * (1 - validation_split));
+            var train_x = x[new Slice(0, train_count)];
+            var train_y = y[new Slice(0, train_count)];
+            ValidationDataPack validation_data;
+            if (sample_weight != null)
+            {
+                validation_data = (x[new Slice(train_count)], y[new Slice(train_count)], sample_weight[new Slice(train_count)]);
+                sample_weight = sample_weight[new Slice(0, train_count)];
+            }
+            else
+            {
+                validation_data = (x[new Slice(train_count)], y[new Slice(train_count)]);
+            }
+
+            return ((train_x, train_y, sample_weight), validation_data);
+        }
+
+        public static ((IEnumerable<NDArray>, NDArray, NDArray), ValidationDataPack) train_validation_split((IEnumerable<NDArray>, NDArray, NDArray) x_y_sample_weight, float validation_split)
+        {
+            var x = x_y_sample_weight.Item1;
+            var y = x_y_sample_weight.Item2;
+            var sample_weight = x_y_sample_weight.Item3;
+            int train_count = Convert.ToInt32(y.dims[0] * (1 - validation_split));
+            var train_x = x.Select(x => x[new Slice(0, train_count)] as NDArray);
+            var train_y = y[new Slice(0, train_count)];
+            var val_x = x.Select(x => x[new Slice(train_count)] as NDArray);
+            var val_y = y[new Slice(train_count)];
+            NDArray tmp_sample_weight = sample_weight;
+            sample_weight = sample_weight[new Slice(0, train_count)];
+            ValidationDataPack validation_data = (val_x, val_y, tmp_sample_weight[new Slice(train_count)]);
+            return ((train_x, train_y, sample_weight), validation_data);
+        }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs
index 4723222f2..a5ee75c93 100644
--- a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs
+++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs
@@ -2,6 +2,7 @@
 using System.Collections.Generic;
 using Tensorflow.Keras.ArgsDefinition;
 using static Tensorflow.Binding;
+using Tensorflow.Keras.Utils;
 
 namespace Tensorflow.Keras.Engine.DataAdapters
 {
@@ -28,6 +29,7 @@ public class DataHandler
         public DataHandler(DataHandlerArgs args)
         {
             this.args = args;
+            
             if (args.StepsPerExecution == null)
             {
                 _steps_per_execution = tf.Variable(1L);
@@ -48,6 +50,7 @@ public DataHandler(DataHandlerArgs args)
                     BatchSize = args.BatchSize,
                     Steps = args.StepsPerEpoch,
                     Epochs = args.Epochs - args.InitialEpoch,
+                    SampleWeight = args.SampleWeight,
                     Shuffle = args.Shuffle,
                     MaxQueueSize = args.MaxQueueSize,
                     Worker = args.Workers,
diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/IDataAdapter.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/IDataAdapter.cs
index 4bdc49795..bb71b0a2d 100644
--- a/src/TensorFlowNET.Keras/Engine/DataAdapters/IDataAdapter.cs
+++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/IDataAdapter.cs
@@ -17,6 +17,8 @@ public interface IDataAdapter
         IDatasetV2 GetDataset();
         int GetSize();
         (Tensors, Tensors) Expand1d(Tensors x, Tensors y);
+        (Tensors, Tensors, Tensors) Expand1d(Tensors x, Tensors y, Tensors sample_weight);
+
         bool ShouldRecreateIterator();
     }
 }
diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs
index 16e646a35..978a3f51c 100644
--- a/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs
+++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/TensorLikeDataAdapter.cs
@@ -20,7 +20,7 @@ public class TensorLikeDataAdapter : DataAdapter, IDataAdapter
         public TensorLikeDataAdapter(DataAdapterArgs args)
         {
             this.args = args;
-            _process_tensorlike();
+            Tensor sample_weight_tensor = args.SampleWeight != null ? _process_tensorlike(args.SampleWeight) : null;
             num_samples = (int)args.X.shape[0];
             var batch_size = args.BatchSize == -1 ? 32 : args.BatchSize;
             _batch_size = batch_size;
@@ -37,6 +37,8 @@ public TensorLikeDataAdapter(DataAdapterArgs args)
                 inputs.AddRange(args.X);
             if (args.Y != null)
                 inputs.AddRange(args.Y);
+            if (sample_weight_tensor != null)
+                inputs.Add(sample_weight_tensor);
             dataset = slice_inputs(indices_dataset, inputs);
             dataset.FirstInputTensorCount = args.X.Length;
         }
@@ -94,8 +96,9 @@ IDatasetV2 slice_inputs(IDatasetV2 indices_dataset, Tensors elements)
 
         public override bool ShouldRecreateIterator() => false;
 
-        void _process_tensorlike()
+        Tensor _process_tensorlike(NDArray sample_weights)
         {
+            return tf.convert_to_tensor(sample_weights);
         }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Engine/LossesContainer.cs b/src/TensorFlowNET.Keras/Engine/LossesContainer.cs
index 6a91450de..c06fca593 100644
--- a/src/TensorFlowNET.Keras/Engine/LossesContainer.cs
+++ b/src/TensorFlowNET.Keras/Engine/LossesContainer.cs
@@ -26,11 +26,11 @@ public LossesContainer(ILossFunc losses, string[] output_names = null)
         /// </summary>
         /// <param name="y_true"></param>
         /// <param name="y_pred"></param>
-        public Tensor Call(Tensor y_true, Tensor y_pred)
+        public Tensor Call(Tensor y_true, Tensor y_pred, Tensor sample_weight = null)
         {
             if (!_built)
                 Build(y_pred);
-            var loss_value = _losses.Call(y_true, y_pred);
+            var loss_value = _losses.Call(y_true, y_pred, sample_weight:sample_weight);
             var loss_metric_value = loss_value;
             var batch_dim = array_ops.shape(y_true)[0];
 
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
index a74a77f18..626d7fcad 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
@@ -30,6 +30,7 @@ public partial class Model
         public Dictionary<string, float> evaluate(NDArray x, NDArray y,
             int batch_size = -1,
             int verbose = 1,
+            NDArray sample_weight = null,
             int steps = -1,
             int max_queue_size = 10,
             int workers = 1,
@@ -51,6 +52,7 @@ public Dictionary<string, float> evaluate(NDArray x, NDArray y,
                 StepsPerEpoch = steps,
                 InitialEpoch = 0,
                 Epochs = 1,
+                SampleWeight = sample_weight,
                 MaxQueueSize = max_queue_size,
                 Workers = workers,
                 UseMultiprocessing = use_multiprocessing,
@@ -140,7 +142,8 @@ Dictionary<string, float> evaluate(DataHandler data_handler, CallbackList callba
         Dictionary<string, float> test_function(DataHandler data_handler, OwnedIterator iterator)
         {
             var data = iterator.next();
-            var outputs = test_step(data_handler, data[0], data[1]);
+            var outputs = data.Length == 2 ? test_step(data_handler, data[0], data[1]) :
+                            test_step(data_handler, data[0], data[1], data[2]);
             tf_with(ops.control_dependencies(new object[0]), ctl => _test_counter.assign_add(1));
             return outputs;
         }
@@ -149,17 +152,23 @@ Dictionary<string, float> test_step_multi_inputs_function(DataHandler data_handl
         {
             var data = iterator.next();
             var x_size = data_handler.DataAdapter.GetDataset().FirstInputTensorCount;
-            var outputs = test_step(data_handler, data.Take(x_size).ToArray(), data.Skip(x_size).ToArray());
+            var outputs = data.Length == 2 ?
+                            test_step(data_handler, new Tensors(data.Take(x_size).ToArray()), new Tensors(data.Skip(x_size).ToArray())) :
+                            test_step(
+                                data_handler,
+                                new Tensors(data.Take(x_size).ToArray()),
+                                new Tensors(data.Skip(x_size).Take(x_size).ToArray()),
+                                new Tensors(data.Skip(2 * x_size).ToArray()));
             tf_with(ops.control_dependencies(new object[0]), ctl => _test_counter.assign_add(1));
             return outputs;
         }
 
 
-        Dictionary<string, float> test_step(DataHandler data_handler, Tensors x, Tensors y)
+        Dictionary<string, float> test_step(DataHandler data_handler, Tensors x, Tensors y, Tensors sample_weight = null)
         {
-            (x, y) = data_handler.DataAdapter.Expand1d(x, y);
+            (x, y, sample_weight) = data_handler.DataAdapter.Expand1d(x, y, sample_weight);
             var y_pred = Apply(x, training: false);
-            var loss = compiled_loss.Call(y, y_pred);
+            var loss = compiled_loss.Call(y, y_pred, sample_weight:sample_weight);
             compiled_metrics.update_state(y, y_pred);
             return metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Item1, x => (float)x.Item2);
         }
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
index d6f89d8be..23c53b707 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
@@ -6,10 +6,12 @@
 using Tensorflow.Keras.Engine.DataAdapters;
 using System.Diagnostics;
 using Tensorflow.Keras.Callbacks;
-using System.Data;
+using Tensorflow.Util;
 
 namespace Tensorflow.Keras.Engine
 {
+
+
     public partial class Model
     {
         /// <summary>
@@ -19,19 +21,29 @@ public partial class Model
         /// <param name="y"></param>
         /// <param name="batch_size"></param>
         /// <param name="epochs"></param>
-        /// <param name="callbacks"></param>
         /// <param name="verbose"></param>
+        /// <param name="callbacks"></param>
         /// <param name="validation_split"></param>
         /// <param name="validation_data"></param>
         /// <param name="shuffle"></param>
+        /// <param name="class_weight"></param>
+        /// <param name="sample_weight"></param>
+        /// <param name="initial_epoch"></param>
+        /// <param name="max_queue_size"></param>
+        /// <param name="workers"></param>
+        /// <param name="use_multiprocessing"></param>
+        /// <returns></returns>
+        /// <exception cref="InvalidArgumentError"></exception>
         public ICallback fit(NDArray x, NDArray y,
             int batch_size = -1,
             int epochs = 1,
             int verbose = 1,
             List<ICallback> callbacks = null,
             float validation_split = 0f,
-            (NDArray val_x, NDArray val_y)? validation_data = null,
+            ValidationDataPack validation_data = null,
             bool shuffle = true,
+            Dictionary<int, float> class_weight = null,
+            NDArray sample_weight = null,
             int initial_epoch = 0,
             int max_queue_size = 10,
             int workers = 1,
@@ -43,21 +55,25 @@ public ICallback fit(NDArray x, NDArray y,
                     $"The array x and y should have same value at dim 0, but got {x.dims[0]} and {y.dims[0]}");
             }
 
-            var train_x = x;
-            var train_y = y;
+            // The default dtype in NDArray is double, so we need to cast sample_weight to float to mul with loss which's dtype is float.
+            sample_weight = sample_weight?.astype(TF_DataType.TF_FLOAT);
 
             if (validation_split != 0f && validation_data == null)
             {
-                int train_count = Convert.ToInt32(x.dims[0] * (1 - validation_split));
-                train_x = x[new Slice(0, train_count)];
-                train_y = y[new Slice(0, train_count)];
-                validation_data = (val_x: x[new Slice(train_count)], val_y: y[new Slice(train_count)]);
+                ((x, y, sample_weight), validation_data) = DataAdapter.train_validation_split((x, y, sample_weight), validation_split);
+            }
+
+            // TODO(Wanglongzhi2001)
+            if (class_weight != null)
+            {
+                throw new NotImplementedException("class_weight is not implemented");
             }
 
             var data_handler = new DataHandler(new DataHandlerArgs
             {
-                X = train_x,
-                Y = train_y,
+                X = x,
+                Y = y,
+                SampleWeight = sample_weight,
                 BatchSize = batch_size,
                 InitialEpoch = initial_epoch,
                 Epochs = epochs,
@@ -73,14 +89,17 @@ public ICallback fit(NDArray x, NDArray y,
                     train_step_func: train_step_function);
         }
 
+
         public ICallback fit(IEnumerable<NDArray> x, NDArray y,
             int batch_size = -1,
             int epochs = 1,
             int verbose = 1,
             List<ICallback> callbacks = null,
             float validation_split = 0f,
-            (IEnumerable<NDArray> val_x, NDArray val_y)? validation_data = null,
+            ValidationDataPack validation_data = null,
             bool shuffle = true,
+            Dictionary<int, float> class_weight = null,
+            NDArray sample_weight = null,
             int initial_epoch = 0,
             int max_queue_size = 10,
             int workers = 1,
@@ -95,27 +114,23 @@ public ICallback fit(IEnumerable<NDArray> x, NDArray y,
                 }
             }
 
-            var train_x = x;
-            var train_y = y;
+            sample_weight = sample_weight?.astype(TF_DataType.TF_FLOAT);
+
             if (validation_split != 0f && validation_data == null)
             {
-                int train_count = Convert.ToInt32(y.dims[0] * (1 - validation_split));
-                train_x = x.Select(x => x[new Slice(0, train_count)] as NDArray);
-                train_y = y[new Slice(0, train_count)];
-                var val_x = x.Select(x => x[new Slice(train_count)] as NDArray);
-                var val_y = y[new Slice(train_count)];
-                validation_data = (val_x, val_y);
+                ((x, y, sample_weight), validation_data) = DataAdapter.train_validation_split((x, y, sample_weight), validation_split);
             }
 
 
             var data_handler = new DataHandler(new DataHandlerArgs
             {
-                X = new Tensors(train_x.ToArray()),
-                Y = train_y,
+                X = new Tensors(x.ToArray()),
+                Y = y,
                 BatchSize = batch_size,
                 InitialEpoch = initial_epoch,
                 Epochs = epochs,
                 Shuffle = shuffle,
+                SampleWeight = sample_weight,
                 MaxQueueSize = max_queue_size,
                 Workers = workers,
                 UseMultiprocessing = use_multiprocessing,
@@ -142,8 +157,10 @@ public History fit(IDatasetV2 dataset,
             int verbose = 1,
             List<ICallback> callbacks = null,
             IDatasetV2 validation_data = null,
-            int validation_step = 10,   // 间隔多少次会进行一次验证
+            int validation_step = 10,
             bool shuffle = true,
+            Dictionary<int, float> class_weight = null,
+            NDArray sample_weight = null,
             int initial_epoch = 0,
             int max_queue_size = 10,
             int workers = 1,
@@ -210,7 +227,7 @@ History FitInternal(DataHandler data_handler, int epochs, int validation_step, i
                 {
                     if (validation_step > 0 && epoch ==0 || (epoch) % validation_step != 0)
                         continue;
-
+                    
                     var val_logs = evaluate(validation_data);
                     foreach(var log in val_logs)
                     {
@@ -233,7 +250,7 @@ History FitInternal(DataHandler data_handler, int epochs, int validation_step, i
             return callbacks.History;
         }
 
-        History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICallback> callbackList, (NDArray, NDArray)? validation_data,
+        History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICallback> callbackList, ValidationDataPack validation_data,
             Func<DataHandler, OwnedIterator, Dictionary<string, float>> train_step_func)
         {
             stop_training = false;
@@ -274,7 +291,8 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
                 {
                     // Because evaluate calls call_test_batch_end, this interferes with our output on the screen
                     // so we need to pass a is_val parameter to stop on_test_batch_end
-                    var val_logs = evaluate(validation_data.Value.Item1, validation_data.Value.Item2, is_val:true);
+                    var (val_x, val_y, val_sample_weight) = validation_data;
+                    var val_logs = evaluate(val_x, val_y, sample_weight:val_sample_weight, is_val:true);
                     foreach (var log in val_logs)
                     {
                         logs["val_" + log.Key] = log.Value;
@@ -296,64 +314,5 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
             return callbacks.History;
         }
 
-        History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICallback> callbackList, (IEnumerable<Tensor>, NDArray)? validation_data,
-    Func<DataHandler, OwnedIterator, Dictionary<string, float>> train_step_func)
-        {
-            stop_training = false;
-            _train_counter.assign(0);
-            var callbacks = new CallbackList(new CallbackParams
-            {
-                Model = this,
-                Verbose = verbose,
-                Epochs = epochs,
-                Steps = data_handler.Inferredsteps
-            });
-
-            if (callbackList != null)
-            {
-                foreach (var callback in callbackList)
-                    callbacks.callbacks.add(callback);
-            }
-
-            callbacks.on_train_begin();
-
-            foreach (var (epoch, iterator) in data_handler.enumerate_epochs())
-            {
-                reset_metrics();
-                callbacks.on_epoch_begin(epoch);
-                // data_handler.catch_stop_iteration();
-                var logs = new Dictionary<string, float>();
-                long End_step = 0;
-                foreach (var step in data_handler.steps())
-                {
-                    callbacks.on_train_batch_begin(step);
-                    logs = train_step_func(data_handler, iterator);
-                    var end_step = step + data_handler.StepIncrement;
-                    End_step = end_step;
-                    callbacks.on_train_batch_end(end_step, logs);
-                }
-
-                if (validation_data != null)
-                {
-                    var val_logs = evaluate(validation_data.Value.Item1, validation_data.Value.Item2);
-                    foreach (var log in val_logs)
-                    {
-                        logs["val_" + log.Key] = log.Value;
-                        callbacks.on_train_batch_end(End_step, logs);
-                    }
-                }
-
-                callbacks.on_epoch_end(epoch, logs);
-
-                GC.Collect();
-                GC.WaitForPendingFinalizers();
-                if (stop_training)
-                {
-                    break;
-                }
-            }
-
-            return callbacks.History;
-        }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Train.cs b/src/TensorFlowNET.Keras/Engine/Model.Train.cs
index ad3c70d2d..8f1ec808c 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Train.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Train.cs
@@ -12,7 +12,9 @@ public partial class Model
         Dictionary<string, float> train_step_function(DataHandler data_handler, OwnedIterator iterator)
         {
             var data = iterator.next();
-            var outputs = train_step(data_handler, data[0], data[1]);
+            // whether have sample_weight
+            var outputs = data.Length == 2 ? train_step(data_handler, data[0], data[1]) :
+                            train_step(data_handler, data[0], data[1], data[2]);
             tf_with(ops.control_dependencies(new object[0]), ctl => _train_counter.assign_add(1));
             return outputs;
         }
@@ -21,7 +23,13 @@ Dictionary<string, float> train_step_multi_inputs_function(DataHandler data_hand
         {
             var data = iterator.next();
             var x_size = data_handler.DataAdapter.GetDataset().FirstInputTensorCount;
-            var outputs = train_step(data_handler, new Tensors(data.Take(x_size).ToArray()), new Tensors(data.Skip(x_size).ToArray()));
+            var outputs = data.Length == 2 ?
+                            train_step(data_handler, new Tensors(data.Take(x_size).ToArray()), new Tensors(data.Skip(x_size).ToArray())) :
+                            train_step(
+                                data_handler, 
+                                new Tensors(data.Take(x_size).ToArray()), 
+                                new Tensors(data.Skip(x_size).Take(x_size).ToArray()), 
+                                new Tensors(data.Skip(2 * x_size).ToArray()));
             tf_with(ops.control_dependencies(new object[0]), ctl => _train_counter.assign_add(1));
             return outputs;
         }
@@ -61,6 +69,34 @@ Dictionary<string, float> train_step(DataHandler data_handler, Tensors x, Tensor
             });
             return dict;
         }
+        Dictionary<string, float> train_step(DataHandler data_handler, Tensors x, Tensors y, Tensors sample_weight = null)
+        {
+            (x, y, sample_weight) = data_handler.DataAdapter.Expand1d(x, y, sample_weight);
+            using var tape = tf.GradientTape();
+            var y_pred = Apply(x, training: true);
+            var loss = compiled_loss.Call(y, y_pred, sample_weight:sample_weight);
+
+            // For custom training steps, users can just write:
+            // trainable_variables = self.trainable_variables
+            // gradients = tape.gradient(loss, trainable_variables)
+            // self.optimizer.apply_gradients(zip(gradients, trainable_variables))
+            // The _minimize call does a few extra steps unnecessary in most cases,
+            // such as loss scaling and gradient clipping.
+            _minimize(tape, optimizer, loss, TrainableVariables);
+            compiled_metrics.update_state(y, y_pred);
+
+            var dict = new Dictionary<string, float>();
+            metrics.ToList().ForEach(x =>
+            {
+                var r = x.result();
+                if (r.ndim > 0)
+                {
+                    r = tf.reduce_mean(r);
+                }
+                dict[x.Name] = (float)r;
+            });
+            return dict;
+        }
 
         void _minimize(GradientTape tape, IOptimizer optimizer, Tensor loss, List<IVariableV1> trainable_variables)
         {
diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs
index dbf5cae1e..67e2b0464 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Layers/Rnn.Test.cs
@@ -74,8 +74,8 @@ public void TrainLSTMWithMnist()
                 OneHot = true,
                 ValidationSize = 55000,
             }).Result;
-
-            model.fit(dataset.Train.Data, dataset.Train.Labels, batch_size: 16, epochs: 1);
+            var sample_weight = np.ones(((int)dataset.Train.Data.shape[0]));
+            model.fit(dataset.Train.Data, dataset.Train.Labels, batch_size: 16, epochs: 1, sample_weight:sample_weight);
         }
 
         [TestMethod]

From 0f02885dfb3647ae1b2bfae51491b4f119da4be9 Mon Sep 17 00:00:00 2001
From: hchen <hchen@smsassist.com>
Date: Mon, 2 Oct 2023 18:57:17 -0500
Subject: [PATCH 29/77] Allow Model to cache weights.

---
 .../Engine/Model.Training.cs                  | 35 ++++++++++++++++++-
 src/TensorFlowNET.Keras/Saving/hdf5_format.cs |  4 +--
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Engine/Model.Training.cs b/src/TensorFlowNET.Keras/Engine/Model.Training.cs
index 50d934d9d..457b3d694 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Training.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Training.cs
@@ -10,8 +10,38 @@ namespace Tensorflow.Keras.Engine
 {
     public partial class Model
     {
+        static Dictionary<string, List<(string, NDArray)>> weightsCache
+            = new Dictionary<string, List<(string, NDArray)>>();
+
         public void load_weights(string filepath, bool by_name = false, bool skip_mismatch = false, object options = null)
         {
+            // Get from cache
+            if (weightsCache.ContainsKey(filepath))
+            {
+                var filtered_layers = new List<ILayer>();
+                foreach (var layer in Layers)
+                {
+                    var weights = hdf5_format._legacy_weights(layer);
+                    if (weights.Count > 0)
+                        filtered_layers.append(layer);
+                }
+
+                var weight_value_tuples = new List<(IVariableV1, NDArray)>();
+                filtered_layers.Select((layer, i) =>
+                {
+                    var symbolic_weights = hdf5_format._legacy_weights(layer);
+                    foreach(var weight in symbolic_weights)
+                    {
+                        var weight_value = weightsCache[filepath].First(x => x.Item1 == weight.Name).Item2;
+                        weight_value_tuples.Add((weight, weight_value));
+                    }
+                    return layer;
+                }).ToList();
+
+                keras.backend.batch_set_value(weight_value_tuples);
+                return;
+            }
+
             long fileId = Hdf5.OpenFile(filepath, true);
             if(fileId < 0)
             {
@@ -29,8 +59,11 @@ public void load_weights(string filepath, bool by_name = false, bool skip_mismat
                 throw new NotImplementedException("");
             else
             {
-                hdf5_format.load_weights_from_hdf5_group(fileId, Layers);
+                var weight_value_tuples = hdf5_format.load_weights_from_hdf5_group(fileId, Layers);
                 Hdf5.CloseFile(fileId);
+
+                weightsCache[filepath] = weight_value_tuples.Select(x => (x.Item1.Name, x.Item2)).ToList();
+                keras.backend.batch_set_value(weight_value_tuples);
             }
         }
 
diff --git a/src/TensorFlowNET.Keras/Saving/hdf5_format.cs b/src/TensorFlowNET.Keras/Saving/hdf5_format.cs
index bab0efecf..68b73953d 100644
--- a/src/TensorFlowNET.Keras/Saving/hdf5_format.cs
+++ b/src/TensorFlowNET.Keras/Saving/hdf5_format.cs
@@ -82,7 +82,7 @@ public static void load_optimizer_weights_from_hdf5_group(long filepath = -1, Di
 
         }
 
-        public static void load_weights_from_hdf5_group(long f, List<ILayer> layers)
+        public static List<(IVariableV1, NDArray)> load_weights_from_hdf5_group(long f, List<ILayer> layers)
         {
             string original_keras_version = "2.5.0";
             string original_backend = null;
@@ -152,7 +152,7 @@ public static void load_weights_from_hdf5_group(long f, List<ILayer> layers)
                 weight_value_tuples.AddRange(zip(symbolic_weights, weight_values));
             }
 
-            keras.backend.batch_set_value(weight_value_tuples);
+            return weight_value_tuples;
         }
 
         public static void toarrayf4(long filepath = -1, Dictionary<string, object> custom_objects = null, bool compile = false)

From a1c64effcfe7976b6cb0f3fbbd268cee203b4874 Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Thu, 5 Oct 2023 20:49:22 +0800
Subject: [PATCH 30/77] feat: add the implementation of class_weight in
 model.fit

---
 .../Engine/DataAdapters/DataHandler.cs        | 70 ++++++++++++++++++-
 .../Engine/Model.Evaluate.cs                  | 13 +++-
 src/TensorFlowNET.Keras/Engine/Model.Fit.cs   | 11 ++-
 3 files changed, 84 insertions(+), 10 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs
index a5ee75c93..a305e5033 100644
--- a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs
+++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataHandler.cs
@@ -3,6 +3,8 @@
 using Tensorflow.Keras.ArgsDefinition;
 using static Tensorflow.Binding;
 using Tensorflow.Keras.Utils;
+using Tensorflow.Util;
+using Tensorflow.Framework;
 
 namespace Tensorflow.Keras.Engine.DataAdapters
 {
@@ -24,6 +26,7 @@ public class DataHandler
         long _steps_per_execution_value;
         int _initial_epoch => args.InitialEpoch;
         int _epochs => args.Epochs;
+        NDArray _sample_weight => args.SampleWeight;
         IVariableV1 _steps_per_execution;
 
         public DataHandler(DataHandlerArgs args)
@@ -75,10 +78,75 @@ public DataHandler(DataHandlerArgs args)
             }
             
             _dataset = _adapter.GetDataset();
-            _inferred_steps = _infer_steps(args.StepsPerEpoch, _dataset);
             _current_step = 0;
             _step_increment = _steps_per_execution_value - 1;
             _insufficient_data = false;
+            _configure_dataset_and_inferred_steps(args.X, args.ClassWeight);
+        }
+
+        void _configure_dataset_and_inferred_steps(Tensors x, Dictionary<int, float> class_weight)
+        {
+            if (_dataset == null)
+            {
+                _dataset = _adapter.GetDataset();
+                _inferred_steps = _infer_steps(args.StepsPerEpoch, _dataset);
+            }
+
+            if (class_weight != null)
+            {
+                _dataset = _dataset.map(_make_class_weight_map_fn(class_weight));
+            }
+            _inferred_steps = _infer_steps(args.StepsPerEpoch, _dataset);
+        }
+
+
+        Func<Tensors, Tensors> _make_class_weight_map_fn(Dictionary<int, float> class_weight)
+        {
+            var class_ids = class_weight.Keys.OrderBy(key => key).ToList();
+            var expected_class_ids = range(class_ids[0], class_ids[class_ids.Count - 1] + 1);
+            if (!class_ids.SequenceEqual(expected_class_ids))
+            {
+                throw new ValueError("Expected `class_weight` to be a dict with keys from 0 to one less "+
+                    $"than the number of classes, found {class_weight}");
+            }
+            
+            var class_weight_list = new List<float>();
+            foreach (var class_id in class_ids)
+            {
+                class_weight_list.Add(class_weight[class_id]);
+            }
+            var class_weight_tensor = tf.convert_to_tensor(class_weight_list.ToArray());
+
+            Func<Tensors, Tensors> _class_weight_map_fn = (Tensors data) =>
+            {
+                var x = data[0];
+                var y = data[1];
+                var sw = _sample_weight == null ? null : ops.convert_to_tensor(_sample_weight);
+
+                if (y.shape.rank > 2)
+                {
+                    throw new ValueError("`class_weight` not supported for 3+ dimensional targets.");
+                }
+
+                var y_classes = smart_module.smart_cond(
+                    y.shape.rank == 2 && y.shape[1] > 1,
+                    () => math_ops.argmax(y, dimension: 1),
+                    () => math_ops.cast(tf.reshape(y, (-1)), TF_DataType.TF_INT64));
+
+                var cw = array_ops.gather(class_weight_tensor, y_classes);
+                if (sw != null)
+                {
+                    cw = tf.cast(cw, sw.dtype);
+                    cw *= sw;
+                }
+                else
+                {
+                    sw = cw;
+                }
+                return new Tensors { x, y, sw };
+            };
+
+            return _class_weight_map_fn;
         }
 
         long _infer_steps(int steps_per_epoch, IDatasetV2 dataset)
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
index 626d7fcad..94a2e6646 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
@@ -164,11 +164,20 @@ Dictionary<string, float> test_step_multi_inputs_function(DataHandler data_handl
         }
 
 
-        Dictionary<string, float> test_step(DataHandler data_handler, Tensors x, Tensors y, Tensors sample_weight = null)
+        Dictionary<string, float> test_step(DataHandler data_handler, Tensors x, Tensors y)
+        {
+            (x,y) = data_handler.DataAdapter.Expand1d(x, y);
+            var y_pred = Apply(x, training: false);
+            var loss = compiled_loss.Call(y, y_pred);
+            compiled_metrics.update_state(y, y_pred);
+            return metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Item1, x => (float)x.Item2);
+        }
+
+        Dictionary<string, float> test_step(DataHandler data_handler, Tensors x, Tensors y, Tensors sample_weight)
         {
             (x, y, sample_weight) = data_handler.DataAdapter.Expand1d(x, y, sample_weight);
             var y_pred = Apply(x, training: false);
-            var loss = compiled_loss.Call(y, y_pred, sample_weight:sample_weight);
+            var loss = compiled_loss.Call(y, y_pred, sample_weight: sample_weight);
             compiled_metrics.update_state(y, y_pred);
             return metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Item1, x => (float)x.Item2);
         }
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
index 23c53b707..689fc9fb8 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
@@ -63,12 +63,6 @@ public ICallback fit(NDArray x, NDArray y,
                 ((x, y, sample_weight), validation_data) = DataAdapter.train_validation_split((x, y, sample_weight), validation_split);
             }
 
-            // TODO(Wanglongzhi2001)
-            if (class_weight != null)
-            {
-                throw new NotImplementedException("class_weight is not implemented");
-            }
-
             var data_handler = new DataHandler(new DataHandlerArgs
             {
                 X = x,
@@ -78,6 +72,7 @@ public ICallback fit(NDArray x, NDArray y,
                 InitialEpoch = initial_epoch,
                 Epochs = epochs,
                 Shuffle = shuffle,
+                ClassWeight = class_weight,
                 MaxQueueSize = max_queue_size,
                 Workers = workers,
                 UseMultiprocessing = use_multiprocessing,
@@ -126,11 +121,12 @@ public ICallback fit(IEnumerable<NDArray> x, NDArray y,
             {
                 X = new Tensors(x.ToArray()),
                 Y = y,
+                SampleWeight = sample_weight,
                 BatchSize = batch_size,
                 InitialEpoch = initial_epoch,
                 Epochs = epochs,
                 Shuffle = shuffle,
-                SampleWeight = sample_weight,
+                ClassWeight = class_weight,
                 MaxQueueSize = max_queue_size,
                 Workers = workers,
                 UseMultiprocessing = use_multiprocessing,
@@ -174,6 +170,7 @@ public History fit(IDatasetV2 dataset,
                 InitialEpoch = initial_epoch,
                 Epochs = epochs,
                 Shuffle = shuffle,
+                SampleWeight = sample_weight,
                 MaxQueueSize = max_queue_size,
                 Workers = workers,
                 UseMultiprocessing = use_multiprocessing,

From ba8f0b084fe30868f091a168d2afa4ff274971d1 Mon Sep 17 00:00:00 2001
From: dogvane <dogvane@gmail.com>
Date: Sun, 8 Oct 2023 21:45:26 +0800
Subject: [PATCH 31/77] =?UTF-8?q?add=20DepthwiseConv2D=20(=E6=B7=B1?=
 =?UTF-8?q?=E5=BA=A6=E5=8F=AF=E5=88=86=E7=A6=BB=E5=8D=B7=E7=A7=AF)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../Eager/EagerRunner.RecordGradient.cs       |   5 +
 src/TensorFlowNET.Core/Gradients/nn_grad.cs   |  31 ++++
 .../Keras/Layers/ILayersApi.cs                |  13 ++
 src/TensorFlowNET.Core/Tensors/tensor_util.cs |   5 +-
 .../Layers/Convolution/DepthwiseConv2D.cs     | 167 ++++++++++++++++++
 src/TensorFlowNET.Keras/Layers/LayersApi.cs   |  32 ++++
 .../EagerModeTestBase.cs                      |  34 ++++
 .../Layers/Layers.Convolution.Test.cs         | 125 +++++++++++++
 .../EagerModeTestBase.cs                      |  14 ++
 9 files changed, 425 insertions(+), 1 deletion(-)
 create mode 100644 src/TensorFlowNET.Keras/Layers/Convolution/DepthwiseConv2D.cs

diff --git a/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs b/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs
index 59d5fd030..2bdd65f5b 100644
--- a/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs
+++ b/src/TensorFlowNET.Core/Eager/EagerRunner.RecordGradient.cs
@@ -80,6 +80,11 @@ BackwardFunction GetGradientFunction(string op_name,
                      Tensor[] op_outputs)
             => (out_grads, unneeded_gradients) =>
             {
+                if(!ops.gradientFunctions.ContainsKey(op_name))
+                {
+                    throw new Exception($"gradientFunctions not find op_name: {op_name}");
+                }
+
                 if (ops.gradientFunctions[op_name] == null)
                     return new Tensor[op_inputs.Length];
 
diff --git a/src/TensorFlowNET.Core/Gradients/nn_grad.cs b/src/TensorFlowNET.Core/Gradients/nn_grad.cs
index a43a91b9a..87646a9ea 100644
--- a/src/TensorFlowNET.Core/Gradients/nn_grad.cs
+++ b/src/TensorFlowNET.Core/Gradients/nn_grad.cs
@@ -229,6 +229,37 @@ public static Tensor[] _Conv2DGrad(Operation op, Tensor[] grads)
             };
         }
 
+        /// <summary>
+        /// Gradient function for Conv2D.
+        /// </summary>
+        /// <param name="op"></param>
+        /// <param name="grads"></param>
+        /// <returns></returns>
+        [RegisterGradient("DepthwiseConv2dNative")]
+        public static Tensor[] _DepthwiseConv2DGrad(Operation op, Tensor[] grads)
+        {
+            var dilations = op.get_attr_list<int>("dilations");
+            var strides = op.get_attr_list<int>("strides");
+            var padding = op.get_attr<string>("padding");
+            var explicit_paddings = op.get_attr_list<int>("explicit_paddings");
+            var data_format = op.get_attr<string>("data_format");
+            var shape = gen_array_ops.shape_n(new Tensor[] { op.inputs[0], op.inputs[1] });
+
+            return new Tensor[]
+            {
+                gen_nn_ops.depthwise_conv2d_native_backprop_input(
+                    shape[0], op.inputs[1], grads[0],
+                    strides, padding, explicit_paddings,
+                    dilations: dilations,
+                    data_format: data_format),
+                gen_nn_ops.depthwise_conv2d_native_backprop_filter(op.inputs[0], shape[1], grads[0],
+                    strides, padding,
+                    dilations: dilations,
+                    explicit_paddings: explicit_paddings,                    
+                    data_format: data_format)
+            };
+        }
+
         [RegisterGradient("FusedBatchNorm")]
         public static Tensor[] _FusedBatchNormGrad(Operation op, Tensor[] grads)
             => _BaseFusedBatchNormGrad(op, 0, grads);
diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
index 5e08eadc4..a8141d354 100644
--- a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
+++ b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
@@ -95,6 +95,19 @@ public ILayer Conv2D(int filters,
             bool use_bias = true,
             string kernel_initializer = "glorot_uniform",
             string bias_initializer = "zeros");
+        public ILayer DepthwiseConv2D(Shape kernel_size = null,
+            Shape strides = null,
+            string padding = "valid",
+            string data_format = null,
+            Shape dilation_rate = null,
+            int groups = 1,
+            int depth_multiplier = 1,
+            string activation = null,
+            bool use_bias = false,
+            string kernel_initializer = "glorot_uniform",
+            string bias_initializer = "zeros",
+            string depthwise_initializer = "glorot_uniform"
+            );
 
         public ILayer Dense(int units);
         public ILayer Dense(int units,
diff --git a/src/TensorFlowNET.Core/Tensors/tensor_util.cs b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
index e65c4850d..f688d4d5d 100644
--- a/src/TensorFlowNET.Core/Tensors/tensor_util.cs
+++ b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
@@ -249,6 +249,9 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T
                     case sbyte val:
                         tensor_proto.IntVal.AddRange(new[] { (int)val });
                         break;
+                    case byte val:
+                        tensor_proto.IntVal.AddRange(new[] { (int)val });
+                        break;
                     case int val:
                         tensor_proto.IntVal.AddRange(new[] { val });
                         break;
@@ -262,7 +265,7 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T
                         tensor_proto.DoubleVal.AddRange(new[] { val });
                         break;
                     default:
-                        throw new Exception("make_tensor_proto Not Implemented");
+                        throw new Exception($"make_tensor_proto Not Implemented {values.GetType().Name}");
                 }
             }
 
diff --git a/src/TensorFlowNET.Keras/Layers/Convolution/DepthwiseConv2D.cs b/src/TensorFlowNET.Keras/Layers/Convolution/DepthwiseConv2D.cs
new file mode 100644
index 000000000..dae4a4036
--- /dev/null
+++ b/src/TensorFlowNET.Keras/Layers/Convolution/DepthwiseConv2D.cs
@@ -0,0 +1,167 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+using System;
+using Tensorflow.Keras.ArgsDefinition;
+using Tensorflow.Keras.Saving;
+using Tensorflow.Common.Types;
+using Tensorflow.Keras.Utils;
+using Tensorflow.Operations;
+using Newtonsoft.Json;
+using System.Security.Cryptography;
+
+namespace Tensorflow.Keras.Layers
+{
+    public class DepthwiseConv2DArgs: Conv2DArgs
+    {
+        /// <summary>
+        /// depth_multiplier: The number of depthwise convolution output channels for
+        /// each input channel.The total number of depthwise convolution output
+        /// channels will be equal to `filters_in* depth_multiplier`.
+        /// </summary>
+        [JsonProperty("depth_multiplier")]
+        public int DepthMultiplier { get; set; } = 1;
+
+        [JsonProperty("depthwise_initializer")]
+        public IInitializer DepthwiseInitializer { get; set; }
+    }
+
+    public class DepthwiseConv2D : Conv2D
+    {
+        /// <summary>
+        /// depth_multiplier: The number of depthwise convolution output channels for
+        /// each input channel.The total number of depthwise convolution output
+        /// channels will be equal to `filters_in* depth_multiplier`.
+        /// </summary>
+        int DepthMultiplier = 1;
+        
+        IInitializer DepthwiseInitializer;
+
+        int[] strides;
+
+        int[] dilation_rate;
+
+        string getDataFormat()
+        {
+            return data_format == "channels_first" ? "NCHW" : "NHWC";
+        }
+
+        static int _id = 1;
+
+        public DepthwiseConv2D(DepthwiseConv2DArgs args):base(args)
+        {
+            args.Padding = args.Padding.ToUpper();
+
+            if(string.IsNullOrEmpty(args.Name))
+                name = "DepthwiseConv2D_" + _id;
+
+            this.DepthMultiplier = args.DepthMultiplier;
+            this.DepthwiseInitializer = args.DepthwiseInitializer;
+
+        }
+
+        public override void build(KerasShapesWrapper input_shape)
+        {
+            //base.build(input_shape);
+
+            var shape = input_shape.ToSingleShape();
+
+            int channel_axis = data_format == "channels_first" ? 1 : -1;
+            var input_channel = channel_axis < 0 ?
+                shape.dims[shape.ndim + channel_axis] :
+                shape.dims[channel_axis];
+
+            var arg = args as DepthwiseConv2DArgs;
+
+            if (arg.Strides.ndim != shape.ndim)
+            {
+                if (arg.Strides.ndim == 2)
+                {
+                    this.strides = new int[] { 1, (int)arg.Strides[0], (int)arg.Strides[1], 1 };
+                }
+                else
+                {
+                    this.strides = conv_utils.normalize_tuple(new int[] { (int)arg.Strides[0] }, shape.ndim, "strides");
+                }
+            }
+            else
+            {
+                this.strides = arg.Strides.dims.Select(o=>(int)(o)).ToArray();
+            }
+
+            if (arg.DilationRate.ndim != shape.ndim)
+            {
+                this.dilation_rate = conv_utils.normalize_tuple(new int[] { (int)arg.DilationRate[0] }, shape.ndim, "dilation_rate");
+            }
+
+            long channel_data = data_format == "channels_first" ? shape[0] : shape[shape.Length - 1];
+
+            var depthwise_kernel_shape = this.kernel_size.dims.concat(new long[] {
+                channel_data,
+                this.DepthMultiplier
+            });
+
+            this.kernel = this.add_weight(
+                shape: depthwise_kernel_shape,
+                initializer: this.DepthwiseInitializer != null ? this.DepthwiseInitializer : this.kernel_initializer,
+                name: "depthwise_kernel",
+                trainable: true,
+                dtype: DType,
+                regularizer: this.kernel_regularizer
+            );
+
+            var axes = new Dictionary<int, int>();
+            axes.Add(-1, (int)input_channel);
+            inputSpec = new InputSpec(min_ndim: rank + 2, axes: axes);
+
+
+            if (use_bias)
+            {
+                bias = add_weight(name: "bias",
+                    shape: ((int)channel_data),
+                    initializer: bias_initializer,
+                    trainable: true,
+                    dtype: DType);
+            }
+
+            built = true;
+            _buildInputShape = input_shape;
+        }
+
+        protected override Tensors Call(Tensors inputs, Tensors state = null,
+            bool? training = false, IOptionalArgs? optional_args = null)
+        {
+            Tensor outputs = null;
+
+            outputs = gen_nn_ops.depthwise_conv2d_native(
+                    inputs,
+                    filter: this.kernel.AsTensor(),
+                    strides: this.strides,
+                    padding: this.padding,
+                    dilations: this.dilation_rate,
+                    data_format: this.getDataFormat(),
+                    name: name
+                );
+
+            if (use_bias)
+            {
+                if (data_format == "channels_first")
+                {
+                    throw new NotImplementedException("call channels_first");
+                }
+                else
+                {
+                    outputs = gen_nn_ops.bias_add(outputs, ops.convert_to_tensor(bias),
+                        data_format: this.getDataFormat(), name: name);
+                }
+            }
+
+            if (activation != null)
+                outputs = activation.Apply(outputs);
+
+
+            return outputs;
+        }
+
+    }
+}
\ No newline at end of file
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
index 928e7e337..95828fbf7 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
@@ -210,6 +210,38 @@ public ILayer Conv2D(int filters,
                     Activation = keras.activations.GetActivationFromName(activation)
                 });
 
+        public ILayer DepthwiseConv2D(Shape kernel_size = null,
+                Shape strides = null,
+                string padding = "valid",
+                string data_format = null,
+                Shape dilation_rate = null,
+                int groups = 1,
+                int depth_multiplier = 1,
+                string activation = null,
+                bool use_bias = false,
+                string kernel_initializer = "glorot_uniform",
+                string bias_initializer = "zeros",
+                string depthwise_initializer = "glorot_uniform"
+                )
+                    => new DepthwiseConv2D(new DepthwiseConv2DArgs
+                    {
+                        Rank = 2,
+                        Filters = 1,
+                        KernelSize = (kernel_size == null) ? (5, 5) : kernel_size,
+                        Strides = strides == null ? (1) : strides,
+                        Padding = padding,
+                        DepthMultiplier = depth_multiplier,
+                        DataFormat = data_format,
+                        DilationRate = dilation_rate == null ? (1) : dilation_rate,
+                        Groups = groups,
+                        UseBias = use_bias,
+                        KernelInitializer = GetInitializerByName(kernel_initializer),
+                        DepthwiseInitializer = GetInitializerByName(depthwise_initializer == null ? kernel_initializer : depthwise_initializer),
+                        BiasInitializer = GetInitializerByName(bias_initializer),
+                        Activation = keras.activations.GetActivationFromName(activation),
+                    });
+
+
         /// <summary>
         /// Transposed convolution layer (sometimes called Deconvolution).
         /// </summary>
diff --git a/test/TensorFlowNET.Keras.UnitTest/EagerModeTestBase.cs b/test/TensorFlowNET.Keras.UnitTest/EagerModeTestBase.cs
index c7eab364c..635f13a54 100644
--- a/test/TensorFlowNET.Keras.UnitTest/EagerModeTestBase.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/EagerModeTestBase.cs
@@ -33,6 +33,40 @@ public bool Equal(float[] f1, float[] f2)
             return ret;
         }
 
+
+        public void AssertArray(int[] f1, int[] f2)
+        {
+            bool ret = false;
+            for (var i = 0; i < f1.Length; i++)
+            {
+                ret = f1[i] == f2[i];
+                if (!ret)
+                    break;
+            }
+
+            if (!ret)
+            {
+                Assert.Fail($"Array not Equal:[{string.Join(",", f1)}] [{string.Join(",", f2)}]");
+            }
+        }
+
+        public void AssertArray(float[] f1, float[] f2)
+        {
+            bool ret = false;
+            var tolerance = .00001f;
+            for (var i = 0; i < f1.Length; i++)
+            {
+                ret = Math.Abs(f1[i] - f2[i]) <= tolerance;
+                if (!ret)
+                    break;
+            }
+
+            if (!ret)
+            {
+                Assert.Fail($"Array float not Equal:[{string.Join(",", f1)}] [{string.Join(",", f2)}]");
+            }
+        }
+
         public bool Equal(double[] d1, double[] d2)
         {
             bool ret = false;
diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Convolution.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Convolution.Test.cs
index 997dcb4f6..15c6e80fe 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Convolution.Test.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Convolution.Test.cs
@@ -1,6 +1,8 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+using System.Linq;
 using Tensorflow.NumPy;
 using static Tensorflow.KerasApi;
+using static Tensorflow.Binding;
 
 namespace Tensorflow.Keras.UnitTest.Layers
 {
@@ -193,5 +195,128 @@ public void BasicConv2D_ksize_dilation_same()
             Assert.AreEqual(x.dims[2], y.shape[2]);
             Assert.AreEqual(filters, y.shape[3]);
         }
+
+
+        [TestMethod]
+        public void BasicDepthwiseConv2D()
+        {
+            var conv = keras.layers.DepthwiseConv2D(kernel_size:3, strides:1, activation: null, 
+                padding:"same", depthwise_initializer: "ones");
+
+            var x = np.arange(2 * 9* 9* 3).reshape((2, 9, 9, 3));
+            var x2 = ops.convert_to_tensor(x, TF_DataType.TF_FLOAT);
+
+            var y = conv.Apply(x2);
+
+            print($"input:{x2.shape} DepthwiseConv2D.out: {y.shape}");
+
+
+            Assert.AreEqual(4, y.shape.ndim);
+            var arr = y.numpy().reshape((2, 9, 9, 3));
+
+            AssertArray(x[new int[] { 1, 1, 1 }].ToArray<int>(), new int[] { 273, 274, 275 });
+            AssertArray(arr[new int[] { 1, 1, 1 }].ToArray<float>(), new float[] { 2457f, 2466f, 2475f });
+
+            var bn = keras.layers.BatchNormalization();
+            var y2 = bn.Apply(y);
+            arr = y2.numpy().ToArray<float>();
+
+            double delta = 0.0001; // 误差范围
+
+            Assert.AreEqual(arr[0], 59.97002f, delta);
+            Assert.AreEqual(arr[1], 63.96802f, delta);
+        }
+
+
+        [TestMethod]
+        public void BasicDepthwiseConv2D_strides_2()
+        {
+            var conv = keras.layers.DepthwiseConv2D(kernel_size: 3, strides: (1, 2, 2, 1), activation: null,
+                padding: "same", depthwise_initializer: "ones");
+
+            var x = np.arange(2 * 9 * 9 * 3).reshape((2, 9, 9, 3));
+            var x2 = ops.convert_to_tensor(x, TF_DataType.TF_FLOAT);
+
+            var y = conv.Apply(x2);
+
+            print($"input:{x2.shape} DepthwiseConv2D.out: {y.shape}");
+
+            Assert.AreEqual(4, y.shape.ndim);
+            var arr = y.numpy().reshape((2, 5, 5, 3));
+
+            AssertArray(x[new int[] { 1, 1, 1 }].ToArray<int>(), new int[] { 273, 274, 275 });
+            AssertArray(arr[new int[] { 1, 1, 1 }].ToArray<float>(), new float[] { 2727f, 2736f, 2745f });
+
+            var bn = keras.layers.BatchNormalization();
+            var y2 = bn.Apply(y);
+            arr = y2.numpy().ToArray<float>();
+
+            double delta = 0.0001; // 误差范围
+
+            Assert.AreEqual(arr[0], 59.97002f, delta);
+            Assert.AreEqual(arr[1], 63.96802f, delta);
+        }
+
+
+
+        [TestMethod]
+        public void BasicDepthwiseConv2D_strides_3()
+        {
+            var conv = keras.layers.DepthwiseConv2D(kernel_size: 3, strides: 3, activation: null,
+                padding: "same", depthwise_initializer: "ones");
+
+            var x = np.arange(2 * 9 * 9 * 3).reshape((2, 9, 9, 3));
+            var x2 = ops.convert_to_tensor(x, TF_DataType.TF_FLOAT);
+
+            var y = conv.Apply(x2);
+
+            print($"input:{x2.shape} DepthwiseConv2D.out: {y.shape}");
+
+            Assert.AreEqual(4, y.shape.ndim);
+            var arr = y.numpy().reshape((2, 3, 3, 3));
+
+            AssertArray(x[new int[] { 1, 1, 1 }].ToArray<int>(), new int[] { 273, 274, 275 });
+            AssertArray(arr[new int[] { 1, 1, 1 }].ToArray<float>(), new float[] { 3267f, 3276f, 3285f });
+
+            var bn = keras.layers.BatchNormalization();
+            var y2 = bn.Apply(y);
+            arr = y2.numpy().ToArray<float>();
+
+            double delta = 0.0001; // 误差范围
+              
+            Assert.AreEqual(arr[0], 269.86508f, delta);
+            Assert.AreEqual(arr[1], 278.8606f, delta);
+
+        }
+        [TestMethod]
+        public void BasicDepthwiseConv2D_UseBias()
+        {
+            var conv = keras.layers.DepthwiseConv2D(kernel_size: 3, strides: 1, activation: null,
+                use_bias: true, padding: "same",
+                depthwise_initializer: "ones",
+                bias_initializer:"ones"
+                );
+
+            var weight = conv.get_weights();
+
+            var x = np.arange(9 * 9 * 3).reshape((1, 9, 9, 3));
+            var x2 = ops.convert_to_tensor(x, TF_DataType.TF_FLOAT);
+            var y = conv.Apply(x2);
+
+            Assert.AreEqual(4, y.shape.ndim);
+            var arr = y.numpy().ToArray<float>();
+
+            Assert.AreEqual(arr[0], 61f);
+            Assert.AreEqual(arr[1], 65f);
+
+            var bn = keras.layers.BatchNormalization();
+            var y2 = bn.Apply(y);
+            arr = y2.numpy().ToArray<float>();
+
+            double delta = 0.0001; // 误差范围
+
+            Assert.AreEqual(arr[0], 60.96952f, delta);
+            Assert.AreEqual(arr[1], 64.96752f, delta);
+        }
     }
 }
diff --git a/test/TensorFlowNET.UnitTest/EagerModeTestBase.cs b/test/TensorFlowNET.UnitTest/EagerModeTestBase.cs
index d08f4e505..b7b9ae128 100644
--- a/test/TensorFlowNET.UnitTest/EagerModeTestBase.cs
+++ b/test/TensorFlowNET.UnitTest/EagerModeTestBase.cs
@@ -20,6 +20,20 @@ public bool Equal(float f1, float f2)
             return Math.Abs(f1 - f2) <= tolerance;
         }
 
+        public bool Equal(long[] l1, long[] l2)
+        {
+            if (l1.Length != l2.Length)
+                return false;
+
+            for (var i = 0; i < l1.Length; i++)
+            {
+                if (l1[i] != l2[i])
+                    return false;
+            }
+
+            return true;
+        }
+
         public bool Equal(float[] f1, float[] f2)
         {
             bool ret = false;

From 5e4f53077f94ddf8513dd925f18eeb05b81a9482 Mon Sep 17 00:00:00 2001
From: dogvane <dogvane@gmail.com>
Date: Sun, 8 Oct 2023 21:52:55 +0800
Subject: [PATCH 32/77] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E5=9B=BE=E7=89=87?=
 =?UTF-8?q?=E5=B7=A6=E5=8F=B3=E5=92=8C=E4=B8=8A=E4=B8=8B=E7=BF=BB=E8=BD=AC?=
 =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98=EF=BC=8C=E5=B9=B6=E5=A2=9E=E5=8A=A0?=
 =?UTF-8?q?=E5=AF=B9=E5=BA=94=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 data/img001.bmp                               | Bin 0 -> 178662 bytes
 src/TensorFlowNET.Core/APIs/tf.image.cs       |   7 +
 src/TensorFlowNET.Core/APIs/tf.io.cs          |   7 +
 .../Keras/Layers/ILayersApi.cs                |   6 +
 .../Operations/image_ops_impl.cs              |  43 ++-
 src/TensorFlowNET.Keras/Layers/LayersApi.cs   |  23 +-
 .../TensorFlowNET.Graph.UnitTest/ImageTest.cs |  90 +++++
 .../ManagedAPI/ArrayOpsTest.cs                | 317 ++++++++++++++++++
 .../TensorFlowNET.UnitTest/NumPy/ShapeTest.cs |  44 +++
 9 files changed, 525 insertions(+), 12 deletions(-)
 create mode 100644 data/img001.bmp
 create mode 100644 test/TensorFlowNET.UnitTest/NumPy/ShapeTest.cs

diff --git a/data/img001.bmp b/data/img001.bmp
new file mode 100644
index 0000000000000000000000000000000000000000..d149d76f1ac11b4f5f6700560f9bba04868f8ab4
GIT binary patch
literal 178662
zcmeI5G14wM4MiWUf?&c4SOE(lBVd!j0!Y~qMKT#V2t4KLvfOT2mSnm6zIrp&-Jjdm
zJ@@?Io0+1DKmPfj|M=~X|NZ&{{q=kL>)*fr^_w5RqpKf3{{HLd|G)Y5Z~wtB5C8!X
z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH
z0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI
z5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X
z009sH0T2Lz&k(r&|61buBLx2SmmlSK$@^B>Khh>*RsvP*syE!OaBy)hfkte)rC)IS
z+(I#VA;86k+i>Fr1n$J9Xdn6BerwE+=NgC&w?FR2(ecM(!|kKX{qpdSV#Dn(>v3-O
zuGkcZe7^oVH{f{kYeu>rFUF?_5x5hZB7CIukHkF4Nt00rV#93{!q{XBfw9<ddrK)e
zbe+JX*l>FtGIoe3AQ2mG$M=F07YPW(hTDsr@p>=;x!7<!xEdU{LSRa4xV?fJ??w`s
z6&r3xwu7_O1QcS!Z8dGY7)C%RHrx&?2nXp1sKticI^cLKfI!!3dMH2oBmYB-kHnPs
zo_yuH*c9+1rGF%5a$|+#nTQRyg|x9Oj6hdx-RU3sLi<QeodbE^G8CIaeWdh{#LQ!<
zViH@i;kKdzyy8K?Tx_`QQ4{u@CEy`8+@5U#>&*!`i4C{So5F@u1pLH?+fzkgu`K~t
zvEjCDS=dcZz*}s%E$;#=4G9E@4Yv*Z!d5~8F=E4QVI5dzMIcOUxNTJ#_K^^X6dP_!
z8o?S90>NU#ZIjlp$s>VyvElY(DOj*hV2jvrd%d{KJN*39FOuJ`5q60Ux39JS?N5Kj
z?{^R=wSMG(*!&pt9i1XYC4lek`n_G5;Nl$w;5OWThsitz+^%lZ1?Ph&hY~2YKK_H~
zwf%FiB{6xAdGbpBa|f+48F=NaE&o;It~g2QCdBq`B8+)QY2)w`)11beJvx=x#6(!S
z6TPr)<uOy~J+>>1o@9({V#95&M;^BaM)9!&o($&MA~r=HmnHu~3!vwB#8k{NF~1?N
zJKu?PU9;ty50+DGf-_b67MLc7&c<x?Hkm=V374ll5xyR?tPZ(LY>N5V(QN54=*&-<
zjoEOcTl0~+8&+qUi|*pR4{5?YVs{IgEqyOf_34V3@p?Bt*IRXJ*B85DmziD9rOk!a
zC`$L9++%-5Od&w4X{*W2h1LGH`wYYm&|6rIqI6qIF}vwv3K_0PXnYRPV`b(?v0dkv
zm!d0uh0K<B)iLEH)wy0EHf4`VF;$1Ubaj<mUR1}Nf~klWql99x?knAI?7A`_%Tyie
z(rcn4E>>br@tmc>eVE-LZ2eg5G6s)BPpl@KEn=l~tC<7#S&Z3GwHlb4V70UPJ`%CZ
zp?;xeTUadJoT1Z(i!mFjbh|#Mp9AZ&H9N?~E-!jSQ&g~6dIKTt?RrcF+cAwsfbrs8
zpLVULhmZ3#>FD<HmB9iX{PofYBjx#MJ*E!t9Rb_w?@n2Kt`?ilwGF6)zh3$xz?j!f
zV(JL<fVRE!{Y{>%h+PJ-imAqKQo0FD*G;TqsyScNS(Sd>)Ao8@Y?{X;<_d7D(rwwA
z?`9Qq1$7B!(zrRF>xNyiuM6bY>tR=bTa_-)yJe+e%oW;{y{UT(jlrRYVwY1^@-Ct_
zEPX(1`$x7h8|LzKP0@5Ke>Y&Ix!5%GN@O+X!pgSv0RFloY-2Xe=~g&ZQ?&4{m}MSf
zm&tXh)sQPL+tTZ{*)Ggu>d5O_4uI(=HVxmTrrXCvI=s!c=A}3LC%H9`serGjIEJ39
z*d;=f{V|sUFW=L=bbdsJCmu0p`Oo$n!!1B;3Nn>_7VI4;kJ9hX+W6cl=2Qz){l>70
z5u38evri!%Q*tVOd}#cqPBG<0H09E5JcQ=NFtN+I4S{BudMig-U%9(*D*f)bUC;eu
z3cC<C8^$MCY|14eAcQHT<X5_IfS6^jF`GuZi6qT-U-WnwFE*u+5Rj0R192@~K8E*7
z*O*Nsbj#yNn&oeKJlP^P1$iVO!8irPwe+b;To-%CeC*}1S=ab=^Ic-o01W}Uty9s(
zBf#ro;k9?^!ZEy;dB+@A)70u6_HO@;VwaHy9L50e5{;jGmp(o{?9+glgLVd;h7s5<
zHYL5|FhIA$C!q9|@vPpBiFw!2U8h#*9{0&0HVxV3a0l%%)Z>Df(jUk0UJw)WuBL9M
z-jVD6NyMgkx*YDH)%n#IF{P`=vwATsX1A$sr&iI%`{WUu=Hwi@xW<X(pN5q#AIW27
zWXz^7io-kg$jJ9hC3cqsXT?*fpG20f9&-J~$e2xEbi2jvY+_T14F?Wtz3A%>k)>bH
z4ZaSJ$y>YD8jwAq*p&JTfg>`<5?s3Z{K5@`WAfszwF+leFUu)5#koS@h}3!SUJNe%
zZVd0|@iBRKwOQA&X~m}0l>mq1!6d$Pj}(u4Zi&eoEag5<t6q><YzkH4b1dq-;}^G-
zJ|4;A(=9Q1iS>)iBjXWb69JCJg9h!nrF5q(gKc-k<UKllbte~_stNwmhTDT52dj@7
zy*=E{FZP|=R*<{<?D|6M4&(Q5W6Z{Fx+T30lO7>9<)zddg&_yN)yC5KFn&+=#^jw^
z>3xdW)GUYKS9jTYZ|UCY>i&CU@>)&YK1l5NFKw!RZ|T)N?~Cm*x!#MB4-%V_n#Y&W
z+g`e4$n`ZDVjA`fu_a1mC|xq=;+hOGt=v9KY%0zw_^bmNN}rusu|7%6^_C&Fy!|Al
z%Lmr2OcIm-E7eq-<HV+P&5yImzeEe6B`IAvt$JCWm`&GoD-87lvE^ZfOvcOdls=ww
z@zXpp`B(R2<6W!i;UW|7*ZEIz<d-(3%Tu~5>Qpg#>4WTticN{8Vh#wXKT1`)I>qCQ
zsbb2%y5A&r@Rv3zk*f4ZMb!(k#gym`vDI~#7qgYV80YTwgfYi|WOv2BOPB5WI9k}W
z2}}3Lo-pR%tNUeQ$N$qvQ7B>QA90&va=q!{(PGp5&6oBrv1t;xO%OKzYD}P8?)Jv<
zW{>7^W_K`L54R0rDz@TZ-HqOgJ$Jpr#oqX(O%UMrE)(jxT#5SCUDxEl=>CxI9UE@%
z<zG(k>YZUQ;S34Irpe*9us*+K<y)HTO~UN`GhG-tr`T{?SfAgr`i~!XZfDlJ{Pe+X
zKRqNn=ltWTiaq}7&Z&2DvB`>@rAu_3T9YtlBX(Es>QA|titM`hI#}~3j_bKon<p%N
zYJTD3Y%#lHQ<laPy1iPI!SH4tKCrNnfMwZAcZ!@U=AGCjJGZ6LX_w)A3_hw;eY$N^
zm9C2*c`HxMvDlP_3&W+VPbAsb*VN;h3qI=lS%y30Dcvv>-zrJWN3qM~+?htI=9cF*
zdD5KH-0~q!4mI2)N$G}(x?5$4DHOZR&&_Gn>fUy<&pzz#+&tvt&7KmlK11nWh&d%T
z0qM%q2=;g@1pBy8v*2z#;c;Qh;C2A&S+UDVUA^g}#<a-W@n8WYr;dQzQ@9k1bz+x;
z8ojTlM=DKcU5_42bzQdo8g9E*up%}M)>uThd|Fq@`Einw_*+|^z->S5i(;3-rd%@d
zd;nx>n87|VAHi)W&L(2hY%4kmRhJW38Ag7J)k?T+Me@3jmDpv^Bt|6}<H~t_4Y$X9
z6rb9PT?W|h)%F1tMZj$Zw81MLV$*Tk)!IJ5fcf1=aQlvo;<=O9Ws;3{Z9H1hLVX2p
zR}_mEu40$j8pjp~mcPF1T=|6Em2kU@W%gl!*fdh(7~Rg!saU_|P{o}56}Zir%sz<`
zyG#{qTJUkR<j3pb_L$G(r;%dQFu}3~ANNRmY60Ay!qqGe7Q37t=3Cf74OOdKxUD9+
zMeH!WPZyg&!EK$}8^xx91H=nBLPuWy7TlJzsw?-3UB+;A&vo;P(fa~!&l1icc31D&
zAcOUd+k=&vKZ4s+?c@=g#%>Ix+o>#fi=DQ;JI?Pp+?KOVC3YFx+(YvX<#^3k!fgrC
zuGREV@~-O|a}~`ubd5jW9Bz;K(5Gp|E|VMDXt>oQ)qn+Xdki_b*oJz)Ol%?pw+C)_
z#U8BR^^u8|CY$Uc&^;G!cd^jJQ^YRwFPgdd`W?f)&*65L|5;*p_3q}nd)T;rx9qIv
zaGUpaqS%z8@sn<OwC)r2i{0JwJcQd#7l(>nQYb7{ywVU$dkeR()f_E01yP8qc;!kg
z^EKSA?VK)l2{PsC)Zz;2lnc0BntF%WQ+mH!Y(fFI3BsGirZ_@rh0Ds%%ZY!7+b<Kl
zNciaO<AM`qZf5?t+piS6t9R3T<974uJ8r-1_KU?PIU9TFwi-M5sIGaz#b@1qx7fwZ
zuEX7jFEE)$_@>)m5SzO0YTSLu#dyJ!54!ykv5TDC<mSl>9KMV2HMc({_M_f+R=nf+
zr`+y}-4J-+ZSzHay?tNUL85p3!15Fn`I};wuCG4V>wyNZ{D@oejwfBWU)kVYsH$|i
zxLfw(=kG9?r=ZH3@9h_t*o_9axfSntQtGaHciVsc!>ae|lXe5X!(^lY0T2KI5C8!X
z009sH0T2KI5C8!X009sH0T2KI5C8!X009sH0T2KI5C8!X009s<k-*PC{bK(6L=Kqw
zeFV(KrVZam35iZ50Jl$U4Kqs!z-<X7tT~YY+&-~2%q$@Qw<VOY=0pN;`^45TvxETL
zmQcc)6A8fW6I;W~5(02rLJ4b5BmlQhYz;F@2*7O#C9FA-0Ng&YHOwp_0JkNSu;xSp
zaQnp8FtdaJ+?G(nniC1Y?GszW%n|}{TS5tIP9y-gPize{O9;Si2_>vKkpSF2u{F#r
sApo}}l(6PR0&x4p)-bb#0Nj>P!kQBa!0i)T!^{!_a9ctNYfdEaUtXM_NB{r;

literal 0
HcmV?d00001

diff --git a/src/TensorFlowNET.Core/APIs/tf.image.cs b/src/TensorFlowNET.Core/APIs/tf.image.cs
index ac9cbc60d..41ef52967 100644
--- a/src/TensorFlowNET.Core/APIs/tf.image.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.image.cs
@@ -339,6 +339,13 @@ public Tensor decode_image(Tensor contents, int channels = 0, TF_DataType dtype
                 => image_ops_impl.decode_image(contents, channels: channels, dtype: dtype,
                     name: name, expand_animations: expand_animations);
 
+            public Tensor encode_png(Tensor contents, string name = null)
+                    => image_ops_impl.encode_png(contents, name: name);
+
+            public Tensor encode_jpeg(Tensor contents, string name = null)
+                    => image_ops_impl.encode_jpeg(contents, name: name);
+
+
             /// <summary>
             /// Convenience function to check if the 'contents' encodes a JPEG image.
             /// </summary>
diff --git a/src/TensorFlowNET.Core/APIs/tf.io.cs b/src/TensorFlowNET.Core/APIs/tf.io.cs
index be1e86e6c..ea1e44b28 100644
--- a/src/TensorFlowNET.Core/APIs/tf.io.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.io.cs
@@ -16,6 +16,7 @@ limitations under the License.
 
 using System.Collections.Generic;
 using Tensorflow.IO;
+using Tensorflow.Operations;
 
 namespace Tensorflow
 {
@@ -46,6 +47,12 @@ public Operation save_v2(Tensor prefix, string[] tensor_names,
             public Tensor[] restore_v2(Tensor prefix, string[] tensor_names,
                 string[] shape_and_slices, TF_DataType[] dtypes, string name = null)
                 => ops.restore_v2(prefix, tensor_names, shape_and_slices, dtypes, name: name);
+
+            public Operation write_file(string filename, Tensor conentes, string name = null)
+                => write_file(Tensorflow.ops.convert_to_tensor(filename, TF_DataType.TF_STRING), conentes, name);
+
+            public Operation write_file(Tensor filename, Tensor conentes, string name = null)
+                => gen_ops.write_file(filename, conentes, name);
         }
 
         public GFile gfile = new GFile();
diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
index a8141d354..3fd98e7a8 100644
--- a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
+++ b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
@@ -55,6 +55,12 @@ public ILayer Conv1D(int filters,
             string kernel_initializer = "glorot_uniform",
             string bias_initializer = "zeros");
 
+        public ILayer Conv2D(int filters,
+                Shape kernel_size = null,
+                Shape strides = null,
+                string padding = "valid"
+            );
+
         public ILayer Conv2D(int filters,
             Shape kernel_size = null,
             Shape strides = null,
diff --git a/src/TensorFlowNET.Core/Operations/image_ops_impl.cs b/src/TensorFlowNET.Core/Operations/image_ops_impl.cs
index 318b8b142..f1aff28ee 100644
--- a/src/TensorFlowNET.Core/Operations/image_ops_impl.cs
+++ b/src/TensorFlowNET.Core/Operations/image_ops_impl.cs
@@ -102,7 +102,10 @@ internal static Operation[] _CheckAtLeast3DImage(Tensor image, bool require_stat
             {
                 throw new ValueError("\'image\' must be fully defined.");
             }
-            var dims = image_shape["-3:"];
+            var dims = new Shape(new[] {
+                                image_shape.dims[image_shape.dims.Length - 3],
+                                image_shape.dims[image_shape.dims.Length - 2],
+                                image_shape.dims[image_shape.dims.Length - 1]});
             foreach (var dim in dims.dims)
             {
                 if (dim == 0)
@@ -112,16 +115,18 @@ internal static Operation[] _CheckAtLeast3DImage(Tensor image, bool require_stat
             }
 
             var image_shape_last_three_elements = new Shape(new[] {
-                                                image_shape.dims[image_shape.dims.Length - 1],
+                                                image_shape.dims[image_shape.dims.Length - 3],
                                                 image_shape.dims[image_shape.dims.Length - 2],
-                                                image_shape.dims[image_shape.dims.Length - 3]});
+                                                image_shape.dims[image_shape.dims.Length - 1]});
             if (!image_shape_last_three_elements.IsFullyDefined)
             {
                 Tensor image_shape_ = array_ops.shape(image);
-                var image_shape_return = tf.constant(new[] {
-                    image_shape_.dims[image_shape.dims.Length - 1],
-                    image_shape_.dims[image_shape.dims.Length - 2],
-                    image_shape_.dims[image_shape.dims.Length - 3]});
+                var image_shape_return = tf.slice(image_shape_, new[] { Math.Max(image_shape.dims.Length - 3, 0) }, new[] { 3 });
+
+                //var image_shape_return = tf.constant(new[] {
+                //    image_shape_.dims[image_shape_.dims.Length - 3],
+                //    image_shape_.dims[image_shape_.dims.Length - 2],
+                //    image_shape_.dims[image_shape_.dims.Length - 1]});
 
                 return new Operation[] {
                     check_ops.assert_positive(
@@ -209,10 +214,10 @@ internal static Tensor _random_flip(Tensor image, int flip_index, int seed, stri
         }
 
         public static Tensor flip_left_right(Tensor image)
-            => _flip(image, 0, "flip_left_right");
+            => _flip(image, 1, "flip_left_right");
 
         public static Tensor flip_up_down(Tensor image)
-            => _flip(image, 1, "flip_up_down");
+            => _flip(image, 0, "flip_up_down");
 
         internal static Tensor _flip(Tensor image, int flip_index, string scope_name)
         {
@@ -223,11 +228,11 @@ internal static Tensor _flip(Tensor image, int flip_index, string scope_name)
                   Shape shape = image.shape;
                   if (shape.ndim == 3 || shape.ndim == Unknown)
                   {
-                      return fix_image_flip_shape(image, gen_array_ops.reverse(image, ops.convert_to_tensor(new int[] { flip_index })));
+                      return fix_image_flip_shape(image, gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new int[] { flip_index })));
                   }
                   else if (shape.ndim == 4)
                   {
-                      return gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new[] { (flip_index + 1) % 2 }));
+                      return gen_array_ops.reverse_v2(image, ops.convert_to_tensor(new[] { flip_index + 1 }));
                   }
                   else
                   {
@@ -2047,6 +2052,22 @@ internal static (Tensor, Tensor) non_max_suppression_padded_v1(Tensor boxes, Ten
             });
         }
 
+        public static Tensor encode_jpeg(Tensor contents, string name = null)
+        {
+            return tf_with(ops.name_scope(name, "encode_jpeg"), scope =>
+            {
+                return gen_ops.encode_jpeg(contents, name:name);
+            });
+        }
+
+        public static Tensor encode_png(Tensor contents, string name = null)
+        {
+            return tf_with(ops.name_scope(name, "encode_png"), scope =>
+            {
+                return gen_ops.encode_png(contents, name: name);
+            });
+        }
+
         public static Tensor is_jpeg(Tensor contents, string name = null)
         {
             return tf_with(ops.name_scope(name, "is_jpeg"), scope =>
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
index 95828fbf7..bcc19dc22 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
@@ -112,7 +112,28 @@ public ILayer Conv1D(int filters,
                 KernelInitializer = GetInitializerByName(kernel_initializer),
                 BiasInitializer = GetInitializerByName(bias_initializer)
             });
-
+        public ILayer Conv2D(int filters,
+                Shape kernel_size = null,
+                Shape strides = null,
+                string padding = "valid")
+        => new Conv2D(new Conv2DArgs
+        {
+            Rank = 2,
+            Filters = filters,
+            KernelSize = (kernel_size == null) ? (5, 5) : kernel_size,
+            Strides = strides == null ? (1, 1) : strides,
+            Padding = padding,
+            DataFormat = null,
+            DilationRate = (1, 1),
+            Groups = 1,
+            UseBias = false,
+            KernelRegularizer = null,
+            KernelInitializer =tf.glorot_uniform_initializer,
+            BiasInitializer = tf.zeros_initializer,
+            BiasRegularizer = null,
+            ActivityRegularizer = null,
+            Activation = keras.activations.Linear,
+        });
         /// <summary>
         /// 2D convolution layer (e.g. spatial convolution over images).
         /// This layer creates a convolution kernel that is convolved with the layer input to produce a tensor of outputs.
diff --git a/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs b/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs
index d671b6096..127b65bf6 100644
--- a/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/ImageTest.cs
@@ -4,6 +4,7 @@
 using Tensorflow;
 using static Tensorflow.Binding;
 using System;
+using System.IO;
 
 namespace TensorFlowNET.UnitTest
 {
@@ -164,5 +165,94 @@ public void TestCropAndResize()
             Assert.AreEqual(result.size, 16ul);
             Assert.AreEqual(result[0, 0, 0, 0], 12f);
         }
+
+        [TestMethod]
+        public void ImageSaveTest()
+        {
+            var imgPath = TestHelper.GetFullPathFromDataDir("img001.bmp");
+            var jpegImgPath = TestHelper.GetFullPathFromDataDir("img001.jpeg");
+            var pngImgPath = TestHelper.GetFullPathFromDataDir("img001.png");
+
+            File.Delete(jpegImgPath);
+            File.Delete(pngImgPath);
+
+            var contents = tf.io.read_file(imgPath);
+            var bmp = tf.image.decode_image(contents);
+            Assert.AreEqual(bmp.name, "decode_image/DecodeImage:0");
+
+            var jpeg = tf.image.encode_jpeg(bmp);
+            var op1 = tf.io.write_file(jpegImgPath, jpeg);
+
+            var png = tf.image.encode_png(bmp);
+            var op2 = tf.io.write_file(pngImgPath, png);
+
+            this.session().run(op1);
+            this.session().run(op2);
+
+            Assert.IsTrue(File.Exists(jpegImgPath), "not find file:" + jpegImgPath);
+            Assert.IsTrue(File.Exists(pngImgPath), "not find file:" + pngImgPath);
+
+            // 如果要测试图片正确性，需要注释下面两行代码
+            File.Delete(jpegImgPath);
+            File.Delete(pngImgPath);
+        }
+
+        [TestMethod]
+        public void ImageFlipTest()
+        {
+            var imgPath = TestHelper.GetFullPathFromDataDir("img001.bmp");
+
+            var contents = tf.io.read_file(imgPath);
+            var bmp = tf.image.decode_image(contents);
+
+            // 左右翻转
+            var lrImgPath = TestHelper.GetFullPathFromDataDir("img001_lr.png");
+            File.Delete(lrImgPath);
+
+            var lr = tf.image.flip_left_right(bmp);
+            var png = tf.image.encode_png(lr);
+            var op = tf.io.write_file(lrImgPath, png);
+            this.session().run(op);
+
+            Assert.IsTrue(File.Exists(lrImgPath), "not find file:" + lrImgPath);
+
+            // 上下翻转
+            var updownImgPath = TestHelper.GetFullPathFromDataDir("img001_updown.png");
+            File.Delete(updownImgPath);
+
+            var updown = tf.image.flip_up_down(bmp);
+            var pngupdown = tf.image.encode_png(updown);
+            var op2 = tf.io.write_file(updownImgPath, pngupdown);
+            this.session().run(op2);
+            Assert.IsTrue(File.Exists(updownImgPath));
+
+
+            // 暂时先人工观测图片是否翻转，观测时需要删除下面这两行代码
+            File.Delete(lrImgPath);
+            File.Delete(updownImgPath);
+
+            // 多图翻转
+            // 目前直接通过 bmp 拿到 shape ，这里先用默认定义图片大小来构建了
+            var mImg = tf.stack(new[] { bmp, lr }, axis:0);
+            print(mImg.shape);
+
+            var up2 = tf.image.flip_up_down(mImg);
+
+            var updownImgPath_m1 = TestHelper.GetFullPathFromDataDir("img001_m_ud.png");   // 直接上下翻转
+            File.Delete(updownImgPath_m1);
+
+            var img001_updown_m2 = TestHelper.GetFullPathFromDataDir("img001_m_lr_ud.png");   // 先左右再上下
+            File.Delete(img001_updown_m2);
+
+            var png2 = tf.image.encode_png(up2[0]);
+            tf.io.write_file(updownImgPath_m1, png2);
+
+            png2 = tf.image.encode_png(up2[1]);
+            tf.io.write_file(img001_updown_m2, png2);
+
+            // 如果要测试图片正确性，需要注释下面两行代码
+            File.Delete(updownImgPath_m1);
+            File.Delete(img001_updown_m2);
+        }
     }
 }
diff --git a/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs b/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs
index 675689bb1..e25c9779d 100644
--- a/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs
+++ b/test/TensorFlowNET.UnitTest/ManagedAPI/ArrayOpsTest.cs
@@ -3,6 +3,7 @@
 using Tensorflow;
 using static Tensorflow.Binding;
 using System.Linq;
+using Tensorflow.Operations;
 
 namespace TensorFlowNET.UnitTest.ManagedAPI
 {
@@ -105,5 +106,321 @@ public void ReverseArray()
             Assert.IsTrue(Equal(a[0].ToArray<float>().Reverse().ToArray(), b[0].ToArray<float>()));
             Assert.IsTrue(Equal(a[1].ToArray<float>().Reverse().ToArray(), b[1].ToArray<float>()));
         }
+
+        [TestMethod]
+        public void ReverseImgArray3D()
+        {
+            // 创建 sourceImg 数组
+            var sourceImgArray = new float[,,] {
+            {
+                { 237, 28, 36 },
+                { 255, 255, 255 },
+                { 255, 255, 255 }
+            },
+            {
+                { 255, 255, 255 },
+                { 255, 255, 255 },
+                { 255, 255, 255 }
+            }
+        };
+            var sourceImg = ops.convert_to_tensor(sourceImgArray);
+
+            // 创建 lrImg 数组
+            var lrImgArray = new float[,,] {
+            {
+                { 255, 255, 255 },
+                { 255, 255, 255 },
+                { 237, 28, 36 }
+            },
+            {
+                { 255, 255, 255 },
+                { 255, 255, 255 },
+                { 255, 255, 255 }
+            }
+        };
+            var lrImg = ops.convert_to_tensor(lrImgArray);
+
+            var lr = tf.image.flip_left_right(sourceImg);
+            Assert.IsTrue(Equal(lrImg.numpy().ToArray<float>(), lr.numpy().ToArray<float>()), "tf.image.flip_left_right fail.");
+
+            var lr2 = tf.reverse(sourceImg, 1);
+            Assert.IsTrue(Equal(lrImg.numpy().ToArray<float>(), lr2.numpy().ToArray<float>()), "tf.reverse (axis=1) fail.");
+
+            var lr3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 1 }));
+            Assert.IsTrue(Equal(lrImg.numpy().ToArray<float>(), lr3.numpy().ToArray<float>()), "gen_array_ops.reverse_v2 axis=1 fail.");
+
+            // 创建 udImg  数组
+            var udImgArray = new float[,,] {
+            {
+                { 255, 255, 255 },
+                { 255, 255, 255 },
+                { 255, 255, 255 }
+            },
+            {
+                { 237, 28, 36 },
+                { 255, 255, 255 },
+                { 255, 255, 255 }
+            }
+        };
+            var udImg = ops.convert_to_tensor(udImgArray);
+
+            var ud = tf.image.flip_up_down(sourceImg);
+            Assert.IsTrue(Equal(udImg.numpy().ToArray<float>(), ud.numpy().ToArray<float>()), "tf.image.flip_up_down fail.");
+
+            var ud2 = tf.reverse(sourceImg, new Axis(0));
+            Assert.IsTrue(Equal(udImg.numpy().ToArray<float>(), ud2.numpy().ToArray<float>()), "tf.reverse (axis=0) fail.");
+
+            var ud3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 0 }));
+            Assert.IsTrue(Equal(udImg.numpy().ToArray<float>(), ud3.numpy().ToArray<float>()), "gen_array_ops.reverse_v2 axis=0 fail.");
+        }
+
+        [TestMethod]
+        public void ReverseImgArray4D()
+        {
+            // 原图左上角，加一张左右翻转后的图片
+            var m = new float[,,,] {
+            {
+                {
+                    { 237, 28, 36 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                }
+            },
+            {
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 237, 28, 36 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                }
+            }
+        };
+            var sourceImg = ops.convert_to_tensor(m);
+
+            var lrArray = new float[,,,] {
+            {
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 237, 28, 36 },
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                }
+            },
+            {
+                {
+                    { 237, 28, 36 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                }
+            }
+        };
+            var lrImg = ops.convert_to_tensor(lrArray);
+
+            // 创建 ud 数组
+            var udArray = new float[,,,] {
+            {
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 237, 28, 36 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                }
+            },
+            {
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 237, 28, 36 }
+                }
+            }
+        };
+            var udImg = ops.convert_to_tensor(udArray);
+
+            var ud3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 1 }));
+            Assert.IsTrue(Equal(udImg.numpy().ToArray<float>(), ud3.numpy().ToArray<float>()), "gen_array_ops.reverse_v2 axis=1 fail.");
+
+            var ud2 = tf.reverse(sourceImg, new Axis(1));
+            Assert.IsTrue(Equal(udImg.numpy().ToArray<float>(), ud2.numpy().ToArray<float>()), "tf.reverse (axis=1) fail.");
+
+            var ud = tf.image.flip_up_down(sourceImg);
+            Assert.IsTrue(Equal(udImg.numpy().ToArray<float>(), ud.numpy().ToArray<float>()), "tf.image.flip_up_down fail.");
+
+            // 左右翻转
+            var lr = tf.image.flip_left_right(sourceImg);
+            Assert.IsTrue(Equal(lrImg.numpy().ToArray<float>(), lr.numpy().ToArray<float>()), "tf.image.flip_left_right fail.");
+
+            var lr2 = tf.reverse(sourceImg, 0);
+            Assert.IsTrue(Equal(lrImg.numpy().ToArray<float>(), lr2.numpy().ToArray<float>()), "tf.reverse (axis=1) fail.");
+
+            var lr3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 0 }));
+            Assert.IsTrue(Equal(lrImg.numpy().ToArray<float>(), lr3.numpy().ToArray<float>()), "gen_array_ops.reverse_v2 axis=1 fail.");
+
+        }
+
+        [TestMethod]
+        public void ReverseImgArray4D_3x3()
+        {
+            // 原图左上角，加一张左右翻转后的图片
+            var m = new float[,,,] {
+            {
+                {
+                    { 237, 28, 36 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                }
+            },
+            {
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 237, 28, 36 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                }
+            }
+        };
+            var sourceImg = ops.convert_to_tensor(m);
+
+            var lrArray = new float[,,,] {
+            {
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 237, 28, 36 },
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                }
+            },
+            {
+                {
+                    { 237, 28, 36 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                }
+            }
+        };
+            var lrImg = ops.convert_to_tensor(lrArray);
+
+            // 创建 ud 数组
+            var udArray = new float[,,,] {
+            {
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 237, 28, 36 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                }
+            },
+            {                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 255, 255, 255 }
+                },
+                {
+                    { 255, 255, 255 },
+                    { 255, 255, 255 },
+                    { 237, 28, 36 }
+                }
+            }
+        };
+            var udImg = ops.convert_to_tensor(udArray);
+
+            var ud3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 1 }));
+            Assert.IsTrue(Equal(udImg.numpy().ToArray<float>(), ud3.numpy().ToArray<float>()), "gen_array_ops.reverse_v2 axis=1 fail.");
+
+            var ud2 = tf.reverse(sourceImg, new Axis(1));
+            Assert.IsTrue(Equal(udImg.numpy().ToArray<float>(), ud2.numpy().ToArray<float>()), "tf.reverse (axis=1) fail.");
+
+            var ud = tf.image.flip_up_down(sourceImg);
+            Assert.IsTrue(Equal(udImg.numpy().ToArray<float>(), ud.numpy().ToArray<float>()), "tf.image.flip_up_down fail.");
+
+            // 左右翻转
+            var lr = tf.image.flip_left_right(sourceImg);
+            Assert.IsTrue(Equal(lrImg.numpy().ToArray<float>(), lr.numpy().ToArray<float>()), "tf.image.flip_left_right fail.");
+
+            var lr2 = tf.reverse(sourceImg, 0);
+            Assert.IsTrue(Equal(lrImg.numpy().ToArray<float>(), lr2.numpy().ToArray<float>()), "tf.reverse (axis=1) fail.");
+
+            var lr3 = gen_array_ops.reverse_v2(sourceImg, ops.convert_to_tensor(new[] { 0 }));
+            Assert.IsTrue(Equal(lrImg.numpy().ToArray<float>(), lr3.numpy().ToArray<float>()), "gen_array_ops.reverse_v2 axis=1 fail.");
+
+        }
     }
 }
diff --git a/test/TensorFlowNET.UnitTest/NumPy/ShapeTest.cs b/test/TensorFlowNET.UnitTest/NumPy/ShapeTest.cs
new file mode 100644
index 000000000..f5a8685be
--- /dev/null
+++ b/test/TensorFlowNET.UnitTest/NumPy/ShapeTest.cs
@@ -0,0 +1,44 @@
+﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+using Tensorflow.NumPy;
+using System;
+using System.Linq;
+using static Tensorflow.Binding;
+using Tensorflow;
+
+namespace TensorFlowNET.UnitTest.NumPy
+{
+    [TestClass]
+    public class ShapeTest : EagerModeTestBase
+    {
+        [Ignore]
+        [TestMethod]
+        public unsafe void ShapeGetLastElements()
+        {
+            // test code from function _CheckAtLeast3DImage
+            // 之前的 _CheckAtLeast3DImage 有bug，现在通过测试，下面的代码是正确的
+            // todo: shape["-3:"] 的写法，目前有bug，需要修复，单元测试等修复后再放开，暂时先忽略测试
+
+            var image_shape = new Shape(new[] { 32, 64, 3 });
+            var image_shape_4d = new Shape(new[] { 4, 64, 32, 3 });
+
+            var image_shape_last_three_elements = new Shape(new[] {
+                                                image_shape.dims[image_shape.dims.Length - 3],
+                                                image_shape.dims[image_shape.dims.Length - 2],
+                                                image_shape.dims[image_shape.dims.Length - 1]});
+
+            var image_shape_last_three_elements2 = image_shape["-3:"];
+
+            Assert.IsTrue(Equal(image_shape_last_three_elements.dims, image_shape_last_three_elements2.dims), "3dims get fail.");
+
+            var image_shape_last_three_elements_4d = new Shape(new[] {
+                                                image_shape_4d.dims[image_shape_4d.dims.Length - 3],
+                                                image_shape_4d.dims[image_shape_4d.dims.Length - 2],
+                                                image_shape_4d.dims[image_shape_4d.dims.Length - 1]});
+
+            var image_shape_last_three_elements2_4d = image_shape_4d["-3:"];
+
+            Assert.IsTrue(Equals(image_shape_last_three_elements_4d.dims, image_shape_last_three_elements2_4d.dims), "4dims get fail.");
+        }
+
+    }
+}
\ No newline at end of file

From baf620a3e875e7cf6cfa82eb3c56392e2b7fab9a Mon Sep 17 00:00:00 2001
From: dogvane <dogvane@gmail.com>
Date: Sun, 8 Oct 2023 22:06:15 +0800
Subject: [PATCH 33/77] =?UTF-8?q?=E8=A7=A3=E5=86=B3keras=E6=A8=A1=E5=BC=8F?=
 =?UTF-8?q?=E4=B8=8B=EF=BC=8C=E4=BD=BF=E7=94=A8GPU=E8=AE=AD=E7=BB=83?=
 =?UTF-8?q?=E6=97=B6=E4=BC=9A=E7=88=86=E6=98=BE=E5=AD=98=E7=9A=84bug?=
 =?UTF-8?q?=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

观察到的现象是，一些模型增大batchsize后，会在首个epoch的中途爆显存不足，只要过了一个epoch后，就能完整训练。同样的batchsize在python下能设置大得多的值。
最后使用最小训练代码分析出，是每个step之后，图片加载到显存里的数据没有释放导致的。
在寻找释放显存接口没有结果的时候，直接使用了GC.Collect();可以让显存主动回收。
因此当前的修复方案是在每个step里，都执行一次 GC.Collect(); 用来释放显存资源。
---
 src/TensorFlowNET.Core/Keras/Engine/IModel.cs | 23 +++++++++++++++++++
 .../Engine/Model.Evaluate.cs                  |  3 +++
 src/TensorFlowNET.Keras/Engine/Model.Fit.cs   | 12 +++++-----
 .../Engine/Model.Predict.cs                   |  2 +-
 4 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/src/TensorFlowNET.Core/Keras/Engine/IModel.cs b/src/TensorFlowNET.Core/Keras/Engine/IModel.cs
index 1840f88b9..889c76d91 100644
--- a/src/TensorFlowNET.Core/Keras/Engine/IModel.cs
+++ b/src/TensorFlowNET.Core/Keras/Engine/IModel.cs
@@ -24,6 +24,7 @@ ICallback fit(NDArray x, NDArray y,
             List<ICallback> callbacks = null,
             float validation_split = 0f,
             ValidationDataPack validation_data = null,
+            int validation_step = 10,
             bool shuffle = true,
             Dictionary<int, float> class_weight = null,
             NDArray sample_weight = null,
@@ -47,6 +48,20 @@ ICallback fit(IEnumerable<NDArray> x, NDArray y,
             int workers = 1,
             bool use_multiprocessing = false);
 
+    public ICallback fit(IDatasetV2 dataset,
+            int batch_size = -1,
+            int epochs = 1,
+            int verbose = 1,
+            List<ICallback> callbacks = null,
+            IDatasetV2 validation_data = null,
+            int validation_step = 10,   // 间隔多少次会进行一次验证
+            bool shuffle = true,
+            Dictionary<int, float> class_weight = null,
+            int initial_epoch = 0,
+            int max_queue_size = 10,
+            int workers = 1,
+            bool use_multiprocessing = false);
+
     void save(string filepath,
             bool overwrite = true,
             bool include_optimizer = true,
@@ -85,6 +100,14 @@ Tensors predict(Tensors x,
             int workers = 1,
             bool use_multiprocessing = false);
 
+    public Tensors predict(IDatasetV2 dataset,
+            int batch_size = -1,
+            int verbose = 0,
+            int steps = -1,
+            int max_queue_size = 10,
+            int workers = 1,
+            bool use_multiprocessing = false);
+
     void summary(int line_length = -1, float[] positions = null);
 
     IKerasConfig get_config();
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
index 94a2e6646..474d5e5a5 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
@@ -132,6 +132,7 @@ Dictionary<string, float> evaluate(DataHandler data_handler, CallbackList callba
                     var end_step = step + data_handler.StepIncrement;
                     if (!is_val)
                         callbacks.on_test_batch_end(end_step, logs);
+                    GC.Collect();
                 }
             }
             callbacks.on_test_end(logs);
@@ -167,7 +168,9 @@ Dictionary<string, float> test_step_multi_inputs_function(DataHandler data_handl
         Dictionary<string, float> test_step(DataHandler data_handler, Tensors x, Tensors y)
         {
             (x,y) = data_handler.DataAdapter.Expand1d(x, y);
+
             var y_pred = Apply(x, training: false);
+
             var loss = compiled_loss.Call(y, y_pred);
             compiled_metrics.update_state(y, y_pred);
             return metrics.Select(x => (x.Name, x.result())).ToDictionary(x => x.Item1, x => (float)x.Item2);
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
index 689fc9fb8..d61211c71 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
@@ -41,6 +41,7 @@ public ICallback fit(NDArray x, NDArray y,
             List<ICallback> callbacks = null,
             float validation_split = 0f,
             ValidationDataPack validation_data = null,
+            int validation_step = 10,
             bool shuffle = true,
             Dictionary<int, float> class_weight = null,
             NDArray sample_weight = null,
@@ -147,7 +148,7 @@ public ICallback fit(IEnumerable<NDArray> x, NDArray y,
             }
         }
 
-        public History fit(IDatasetV2 dataset, 
+        public ICallback fit(IDatasetV2 dataset, 
             int batch_size = -1,
             int epochs = 1,
             int verbose = 1,
@@ -156,7 +157,6 @@ public History fit(IDatasetV2 dataset,
             int validation_step = 10,
             bool shuffle = true,
             Dictionary<int, float> class_weight = null,
-            NDArray sample_weight = null,
             int initial_epoch = 0,
             int max_queue_size = 10,
             int workers = 1,
@@ -170,7 +170,7 @@ public History fit(IDatasetV2 dataset,
                 InitialEpoch = initial_epoch,
                 Epochs = epochs,
                 Shuffle = shuffle,
-                SampleWeight = sample_weight,
+                ClassWeight = class_weight,
                 MaxQueueSize = max_queue_size,
                 Workers = workers,
                 UseMultiprocessing = use_multiprocessing,
@@ -218,6 +218,7 @@ History FitInternal(DataHandler data_handler, int epochs, int validation_step, i
                     var end_step = step + data_handler.StepIncrement;
                     End_step = end_step;
                     callbacks.on_train_batch_end(end_step, logs);
+                    GC.Collect();
                 }
 
                 if (validation_data != null)
@@ -233,11 +234,10 @@ History FitInternal(DataHandler data_handler, int epochs, int validation_step, i
                     callbacks.on_train_batch_end(End_step, logs);
                 }
 
+                GC.Collect();
 
                 callbacks.on_epoch_end(epoch, logs);
 
-                GC.Collect();
-                GC.WaitForPendingFinalizers();
                 if (stop_training)
                 {
                     break;
@@ -282,6 +282,7 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
                     var end_step = step + data_handler.StepIncrement;
                     End_step = end_step;
                     callbacks.on_train_batch_end(end_step, logs);
+                    GC.Collect();
                 }
 
                 if (validation_data != null)
@@ -301,7 +302,6 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
                 callbacks.on_epoch_end(epoch, logs);
 
                 GC.Collect();
-                GC.WaitForPendingFinalizers();
                 if (stop_training)
                 {
                     break;
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Predict.cs b/src/TensorFlowNET.Keras/Engine/Model.Predict.cs
index cbe4a7295..e3a5aba68 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Predict.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Predict.cs
@@ -102,9 +102,9 @@ Tensors PredictInternal(DataHandler data_handler, int verbose)
                         for (int i = 0; i < batch_outputs.Length; i++)
                             batch_outputs[i] = tf.concat(new Tensor[] { batch_outputs[i], tmp_batch_outputs[i] }, axis: 0);
                     }
-
                     var end_step = step + data_handler.StepIncrement;
                     callbacks.on_predict_batch_end(end_step, new Dictionary<string, Tensors> { { "outputs", batch_outputs } });
+                    GC.Collect();
                 }
             }
 

From 93a242c08a330399328c8a1190f6b0d46308a226 Mon Sep 17 00:00:00 2001
From: Jucko13 <Jucko13@users.noreply.github.com>
Date: Tue, 10 Oct 2023 16:53:04 +0200
Subject: [PATCH 34/77] Implemented support for loading Concatenate layers

model.load_model now supports loading of concatenate layers.
python tensorflow exports concatenate layers in an extra nested array in the manifest so added a check for that in generic_utils.cs.
Concatenate was missing the build=true, this fix prevents the layer being build multiple times.
Concatenate has 2 or more input nodes so List<NodeConfig> was required instead of just NodeConfig in Functional.FromConfig.cs.
Added missing axis JsonProperty attribute for MergeArgs (used by Concatenate)
---
 .../Keras/ArgsDefinition/Merging/MergeArgs.cs |  6 ++--
 .../Engine/Functional.FromConfig.cs           | 30 +++++++++++--------
 .../Layers/Merging/Concatenate.cs             |  1 +
 .../Utils/generic_utils.cs                    | 13 +++++++-
 4 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/MergeArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/MergeArgs.cs
index 0140b3dd0..9bcf1908e 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/MergeArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Merging/MergeArgs.cs
@@ -1,13 +1,15 @@
-﻿using System;
+﻿using Newtonsoft.Json;
+using System;
 using System.Collections.Generic;
 using System.Text;
 
 namespace Tensorflow.Keras.ArgsDefinition
 {
     // TODO: complete the implementation
-    public class MergeArgs : LayerArgs
+    public class MergeArgs : AutoSerializeLayerArgs
     {
         public Tensors Inputs { get; set; }
+        [JsonProperty("axis")]
         public int Axis { get; set; }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Engine/Functional.FromConfig.cs b/src/TensorFlowNET.Keras/Engine/Functional.FromConfig.cs
index 7b826af8e..375fc9106 100644
--- a/src/TensorFlowNET.Keras/Engine/Functional.FromConfig.cs
+++ b/src/TensorFlowNET.Keras/Engine/Functional.FromConfig.cs
@@ -30,7 +30,7 @@ public static (Tensors, Tensors, Dictionary<string, ILayer>) reconstruct_from_co
             created_layers = created_layers ?? new Dictionary<string, ILayer>();
             var node_index_map = new Dictionary<(string, int), int>();
             var node_count_by_layer = new Dictionary<ILayer, int>();
-            var unprocessed_nodes = new Dictionary<ILayer, NodeConfig>();
+            var unprocessed_nodes = new Dictionary<ILayer, List<NodeConfig>>();
             // First, we create all layers and enqueue nodes to be processed
             foreach (var layer_data in config.Layers)
                 process_layer(created_layers, layer_data, unprocessed_nodes, node_count_by_layer);
@@ -79,7 +79,7 @@ public static (Tensors, Tensors, Dictionary<string, ILayer>) reconstruct_from_co
 
         static void process_layer(Dictionary<string, ILayer> created_layers, 
             LayerConfig layer_data, 
-            Dictionary<ILayer, NodeConfig> unprocessed_nodes,
+            Dictionary<ILayer, List<NodeConfig>> unprocessed_nodes,
             Dictionary<ILayer, int> node_count_by_layer)
         {
             ILayer layer = null;
@@ -92,32 +92,38 @@ static void process_layer(Dictionary<string, ILayer> created_layers,
 
                 created_layers[layer_name] = layer;
             }
-            node_count_by_layer[layer] = _should_skip_first_node(layer) ? 1 : 0;
+            node_count_by_layer[layer] = layer_data.InboundNodes.Count - (_should_skip_first_node(layer) ? 1 : 0);
 
             var inbound_nodes_data = layer_data.InboundNodes;
             foreach (var node_data in inbound_nodes_data)
             {
                 if (!unprocessed_nodes.ContainsKey(layer))
-                    unprocessed_nodes[layer] = node_data;
+                    unprocessed_nodes[layer] = new List<NodeConfig>() { node_data };
                 else
-                    unprocessed_nodes.Add(layer, node_data);
+                    unprocessed_nodes[layer].Add(node_data);
             }
         }
 
         static void process_node(ILayer layer, 
-            NodeConfig node_data, 
+            List<NodeConfig> nodes_data, 
             Dictionary<string, ILayer> created_layers,
             Dictionary<ILayer, int> node_count_by_layer,
             Dictionary<(string, int), int> node_index_map)
         {
+
             var input_tensors = new List<Tensor>();
-            var inbound_layer_name = node_data.Name;
-            var inbound_node_index = node_data.NodeIndex;
-            var inbound_tensor_index = node_data.TensorIndex;
 
-            var inbound_layer = created_layers[inbound_layer_name];
-            var inbound_node = inbound_layer.InboundNodes[inbound_node_index];
-            input_tensors.Add(inbound_node.Outputs[inbound_node_index]);
+            for (int i = 0; i < nodes_data.Count; i++)
+            {
+                var node_data = nodes_data[i];
+                var inbound_layer_name = node_data.Name;
+                var inbound_node_index = node_data.NodeIndex;
+                var inbound_tensor_index = node_data.TensorIndex;
+
+                var inbound_layer = created_layers[inbound_layer_name];
+                var inbound_node = inbound_layer.InboundNodes[inbound_node_index];
+                input_tensors.Add(inbound_node.Outputs[inbound_node_index]);
+            }
 
             var output_tensors = layer.Apply(input_tensors);
 
diff --git a/src/TensorFlowNET.Keras/Layers/Merging/Concatenate.cs b/src/TensorFlowNET.Keras/Layers/Merging/Concatenate.cs
index a2a8286ba..fa82426ce 100644
--- a/src/TensorFlowNET.Keras/Layers/Merging/Concatenate.cs
+++ b/src/TensorFlowNET.Keras/Layers/Merging/Concatenate.cs
@@ -39,6 +39,7 @@ public override void build(KerasShapesWrapper input_shape)
                 shape_set.Add(shape);
             }*/
             _buildInputShape = input_shape;
+            built = true;
         }
 
         protected override Tensors _merge_function(Tensors inputs)
diff --git a/src/TensorFlowNET.Keras/Utils/generic_utils.cs b/src/TensorFlowNET.Keras/Utils/generic_utils.cs
index 5402f4995..20937e2e5 100644
--- a/src/TensorFlowNET.Keras/Utils/generic_utils.cs
+++ b/src/TensorFlowNET.Keras/Utils/generic_utils.cs
@@ -112,12 +112,23 @@ public static FunctionalConfig deserialize_model_config(JToken json)
             foreach (var token in layersToken)
             {
                 var args = deserialize_layer_args(token["class_name"].ToObject<string>(), token["config"]);
+
+                List<NodeConfig> nodeConfig = null; //python tensorflow sometimes exports inbound nodes in an extra nested array
+                if (token["inbound_nodes"].Count() > 0 && token["inbound_nodes"][0].Count() > 0 && token["inbound_nodes"][0][0].Count() > 0)
+                {
+                    nodeConfig = token["inbound_nodes"].ToObject<List<List<NodeConfig>>>().FirstOrDefault() ?? new List<NodeConfig>();
+                }
+                else
+                {
+                    nodeConfig = token["inbound_nodes"].ToObject<List<NodeConfig>>();
+                }
+
                 config.Layers.Add(new LayerConfig()
                 {
                     Config = args, 
                     Name = token["name"].ToObject<string>(), 
                     ClassName = token["class_name"].ToObject<string>(), 
-                    InboundNodes = token["inbound_nodes"].ToObject<List<NodeConfig>>()
+                    InboundNodes = nodeConfig,
                 });
             }
             config.InputLayers = json["input_layers"].ToObject<List<NodeConfig>>();

From 9f0ffa4bc83b181ddd525cf1b90d77a32e073fa3 Mon Sep 17 00:00:00 2001
From: Jucko13 <Jucko13@users.noreply.github.com>
Date: Tue, 10 Oct 2023 17:02:22 +0200
Subject: [PATCH 35/77] Implemented unittests for Concatenate layers and calls

The loading and saving of a simple model with a Concatenate layer is tested to check if the model is the same after reloading.
Implemented missing axis parameter for np.stack (added some handy tuple calls too like the np.concatenate example).
---
 .../NumPy/Numpy.Manipulation.cs               |  9 ++++
 .../Layers/Layers.Merging.Test.cs             | 15 ++++---
 .../Model/ModelLoadTest.cs                    | 43 +++++++++++++++++++
 3 files changed, 62 insertions(+), 5 deletions(-)

diff --git a/src/TensorFlowNET.Core/NumPy/Numpy.Manipulation.cs b/src/TensorFlowNET.Core/NumPy/Numpy.Manipulation.cs
index 940856056..5e2574170 100644
--- a/src/TensorFlowNET.Core/NumPy/Numpy.Manipulation.cs
+++ b/src/TensorFlowNET.Core/NumPy/Numpy.Manipulation.cs
@@ -30,6 +30,15 @@ public static NDArray concatenate((NDArray, NDArray) tuple, int axis = 0)
         [AutoNumPy]
         public static NDArray stack(params NDArray[] arrays) => new NDArray(array_ops.stack(arrays));
 
+        [AutoNumPy]
+        public static NDArray stack(NDArray[] arrays, int axis = 0) => new NDArray(array_ops.stack(arrays, axis));
+        
+        [AutoNumPy]
+        public static NDArray stack((NDArray, NDArray) tuple, int axis = 0) => new NDArray(array_ops.stack(new[] { tuple.Item1, tuple.Item2 }, axis));
+
+        [AutoNumPy]
+        public static NDArray stack((NDArray, NDArray, NDArray) tuple, int axis = 0) => new NDArray(array_ops.stack(new[] { tuple.Item1, tuple.Item2, tuple.Item3 }, axis));
+
         [AutoNumPy]
         public static NDArray moveaxis(NDArray array, Axis source, Axis destination) => new NDArray(array_ops.moveaxis(array, source, destination));
     }
diff --git a/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Merging.Test.cs b/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Merging.Test.cs
index 36e44e482..9bc2fa767 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Merging.Test.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Layers/Layers.Merging.Test.cs
@@ -1,4 +1,5 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+using System.Collections.Generic;
 using Tensorflow.NumPy;
 using static Tensorflow.KerasApi;
 
@@ -8,12 +9,16 @@ namespace Tensorflow.Keras.UnitTest.Layers
     public class LayersMergingTest : EagerModeTestBase
     {
         [TestMethod]
-        public void Concatenate()
+        [DataRow(1, 4, 1, 5)]
+        [DataRow(2, 2, 2, 5)]
+        [DataRow(3, 2, 1, 10)]
+        public void Concatenate(int axis, int shapeA, int shapeB, int shapeC)
         {
-            var x = np.arange(20).reshape((2, 2, 5));
-            var y = np.arange(20, 30).reshape((2, 1, 5));
-            var z = keras.layers.Concatenate(axis: 1).Apply(new Tensors(x, y));
-            Assert.AreEqual((2, 3, 5), z.shape);
+            var x = np.arange(10).reshape((1, 2, 1, 5));
+            var y = np.arange(10, 20).reshape((1, 2, 1, 5));
+            var z = keras.layers.Concatenate(axis: axis).Apply(new Tensors(x, y));
+            Assert.AreEqual((1, shapeA, shapeB, shapeC), z.shape);
         }
+
     }
 }
diff --git a/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs b/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs
index cb570fc0c..53a67cbfa 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs
@@ -1,10 +1,13 @@
 ﻿using Microsoft.VisualStudio.TestPlatform.Utilities;
 using Microsoft.VisualStudio.TestTools.UnitTesting;
+using Newtonsoft.Json.Linq;
 using System.Linq;
+using System.Xml.Linq;
 using Tensorflow.Keras.Engine;
 using Tensorflow.Keras.Optimizers;
 using Tensorflow.Keras.UnitTest.Helpers;
 using Tensorflow.NumPy;
+using static HDF.PInvoke.H5Z;
 using static Tensorflow.Binding;
 using static Tensorflow.KerasApi;
 
@@ -124,4 +127,44 @@ public void TestModelBeforeTF2_5()
         var model = tf.saved_model.load(@"D:\development\temp\saved_model") as Tensorflow.Keras.Engine.Model;
         model.summary();
     }
+
+
+
+    [TestMethod]
+    public void CreateConcatenateModelSaveAndLoad()
+    {
+        // a small demo model that is just here to see if the axis value for the concatenate method is saved and loaded.
+        var input_layer = tf.keras.layers.Input((8, 8, 5));
+
+        var conv1 = tf.keras.layers.Conv2D(2, kernel_size: 3, activation: "relu", padding: "same"/*, data_format: "_conv_1"*/).Apply(input_layer);
+        conv1.Name = "conv1";
+
+        var conv2 = tf.keras.layers.Conv2D(2, kernel_size: 3, activation: "relu", padding: "same"/*, data_format: "_conv_2"*/).Apply(input_layer);
+        conv2.Name = "conv2";
+
+        var concat1 = tf.keras.layers.Concatenate(axis: 3).Apply((conv1, conv2));
+        concat1.Name = "concat1";
+
+        var model = tf.keras.Model(input_layer, concat1);
+        model.compile(tf.keras.optimizers.Adam(), tf.keras.losses.CategoricalCrossentropy());
+
+        model.save(@"Assets/concat_axis3_model");
+
+        
+        var tensorInput = np.arange(320).reshape((1, 8, 8, 5)).astype(TF_DataType.TF_FLOAT);
+
+        var tensors1 = model.predict(tensorInput);
+
+        Assert.AreEqual((1, 8, 8, 4), tensors1.shape);
+
+        model = null;
+        keras.backend.clear_session();
+
+        var model2 = tf.keras.models.load_model(@"Assets/concat_axis3_model");
+
+        var tensors2 = model2.predict(tensorInput);
+
+        Assert.AreEqual(tensors1.shape, tensors2.shape);
+    }
+
 }

From ec4f372a29b5cbc5fe6c0d6b8414ddb48c22e548 Mon Sep 17 00:00:00 2001
From: dogvane <dogvane@gmail.com>
Date: Mon, 16 Oct 2023 11:22:58 +0800
Subject: [PATCH 36/77] add relu6

---
 src/TensorFlowNET.Core/APIs/tf.nn.cs          |  5 ++++
 .../Keras/Activations/Activations.cs          |  1 +
 .../Keras/Layers/ILayersApi.cs                |  3 +++
 src/TensorFlowNET.Keras/Activations.cs        |  7 ++++++
 .../Layers/Activation/ReLu6.cs                | 25 +++++++++++++++++++
 src/TensorFlowNET.Keras/Layers/LayersApi.cs   |  9 +++++++
 6 files changed, 50 insertions(+)
 create mode 100644 src/TensorFlowNET.Keras/Layers/Activation/ReLu6.cs

diff --git a/src/TensorFlowNET.Core/APIs/tf.nn.cs b/src/TensorFlowNET.Core/APIs/tf.nn.cs
index 397c68c7c..112c48628 100644
--- a/src/TensorFlowNET.Core/APIs/tf.nn.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.nn.cs
@@ -101,6 +101,8 @@ public Tensor embedding_lookup(Tensor @params,
                     name: name);
 
             public IActivation relu() => new relu();
+
+
             public IActivation swish() => new swish();
             public IActivation tanh() => new tanh();
 
@@ -111,6 +113,9 @@ public Tensor tanh(Tensor x, string name = null)
             public Tensor relu(Tensor features, string name = null)
                 => gen_nn_ops.relu(features, name);
 
+            public Tensor relu6(Tensor features, string name = null)
+                    => gen_nn_ops.relu6(features, name);
+
             public Tensor[] fused_batch_norm(Tensor x,
                 Tensor scale,
                 Tensor offset,
diff --git a/src/TensorFlowNET.Core/Keras/Activations/Activations.cs b/src/TensorFlowNET.Core/Keras/Activations/Activations.cs
index f0d59ed62..37264104a 100644
--- a/src/TensorFlowNET.Core/Keras/Activations/Activations.cs
+++ b/src/TensorFlowNET.Core/Keras/Activations/Activations.cs
@@ -32,6 +32,7 @@ public interface IActivationsApi
         Activation Linear { get; }
 
         Activation Relu { get; }
+        Activation Relu6 { get; }
 
         Activation Sigmoid { get; }
 
diff --git a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
index 3fd98e7a8..57273eb08 100644
--- a/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
+++ b/src/TensorFlowNET.Core/Keras/Layers/ILayersApi.cs
@@ -180,6 +180,9 @@ public ILayer LayerNormalization(Axis? axis,
         public ILayer Normalization(Shape? input_shape = null, int? axis = -1, float? mean = null, float? variance = null, bool invert = false);
         public ILayer LeakyReLU(float alpha = 0.3f);
 
+        public ILayer ReLU6();
+
+
         public IRnnCell LSTMCell(int uints,
             string activation = "tanh",
             string recurrent_activation = "sigmoid",
diff --git a/src/TensorFlowNET.Keras/Activations.cs b/src/TensorFlowNET.Keras/Activations.cs
index ce5b4eb13..d3801902f 100644
--- a/src/TensorFlowNET.Keras/Activations.cs
+++ b/src/TensorFlowNET.Keras/Activations.cs
@@ -20,6 +20,11 @@ public class Activations: IActivationsApi
             Name = "relu",
             ActivationFunction = (features, name) => tf.Context.ExecuteOp("Relu", name, new ExecuteOpArgs(features))
         };
+        private static Activation _relu6 = new Activation()
+        {
+            Name = "relu6",
+            ActivationFunction = (features, name) => tf.Context.ExecuteOp("Relu6", name, new ExecuteOpArgs(features))
+        };
         private static Activation _sigmoid = new Activation()
         {
             Name = "sigmoid",
@@ -55,6 +60,7 @@ static Activations()
             _nameActivationMap = new Dictionary<string, Activation>();
 
             RegisterActivation(_relu);
+            RegisterActivation(_relu6);
             RegisterActivation(_linear);
             RegisterActivation(_sigmoid);
             RegisterActivation(_softmax);
@@ -65,6 +71,7 @@ static Activations()
         public Activation Linear => _linear;
 
         public Activation Relu => _relu;
+        public Activation Relu6 => _relu6;
 
         public Activation Sigmoid => _sigmoid;
 
diff --git a/src/TensorFlowNET.Keras/Layers/Activation/ReLu6.cs b/src/TensorFlowNET.Keras/Layers/Activation/ReLu6.cs
new file mode 100644
index 000000000..5af3f7677
--- /dev/null
+++ b/src/TensorFlowNET.Keras/Layers/Activation/ReLu6.cs
@@ -0,0 +1,25 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+using Tensorflow.Keras.ArgsDefinition;
+using Tensorflow.Keras.Engine;
+using Tensorflow.Common.Types;
+using static Tensorflow.Binding;
+
+namespace Tensorflow.Keras.Layers
+{
+    /// <summary>
+    /// Leaky version of a Rectified Linear Unit.
+    /// </summary>
+    public class ReLu6 : Layer
+    {
+        public ReLu6() : base(new LayerArgs { })
+        {
+        }
+
+        protected override Tensors Call(Tensors inputs, Tensors state = null, bool? training = null, IOptionalArgs? optional_args = null)
+        {
+            return tf.nn.relu6(inputs);
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Keras/Layers/LayersApi.cs b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
index bcc19dc22..e2adb23d0 100644
--- a/src/TensorFlowNET.Keras/Layers/LayersApi.cs
+++ b/src/TensorFlowNET.Keras/Layers/LayersApi.cs
@@ -735,6 +735,15 @@ public ILayer LeakyReLU(float alpha = 0.3f)
             });
 
 
+        /// <summary>
+        /// Leaky version of a Rectified Linear Unit.
+        /// </summary>
+        /// <param name="alpha">Negative slope coefficient.</param>
+        /// <returns></returns>
+        public ILayer ReLU6()
+            => new ReLu6();
+
+
         public IRnnCell SimpleRNNCell(
             int units,
             string activation = "tanh",

From eb4ff88d39160e6046e43fe5e7453ea3e1abeac4 Mon Sep 17 00:00:00 2001
From: SMURF <m-mohamed-a@hotmail.com>
Date: Wed, 18 Oct 2023 23:34:15 +0100
Subject: [PATCH 37/77] fix: Saving a loaded model

---
 src/TensorFlowNET.Keras/Engine/Layer.Serialize.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/TensorFlowNET.Keras/Engine/Layer.Serialize.cs b/src/TensorFlowNET.Keras/Engine/Layer.Serialize.cs
index ed5c2de0a..49811417e 100644
--- a/src/TensorFlowNET.Keras/Engine/Layer.Serialize.cs
+++ b/src/TensorFlowNET.Keras/Engine/Layer.Serialize.cs
@@ -27,6 +27,6 @@ public override IDictionary<string, Trackable> _trackable_children(SaveType save
             children = new Dictionary<string, Trackable>();
         }
 
-        return children.Concat(base._trackable_children(save_type, cache)).ToDictionary(x => x.Key, x => x.Value);
+        return children.Concat(base._trackable_children(save_type, cache)).GroupBy(x => x.Key).Select(g => g.First()).ToDictionary(x => x.Key, x => x.Value);
     }
 }
\ No newline at end of file

From a73694ab2db42b2a4ea560c6bbb36ed9175fc5fb Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Fri, 20 Oct 2023 11:24:27 +0800
Subject: [PATCH 38/77] fix: add the implementation of the tile's grad

---
 .../Gradients/array_grad.cs                   | 24 +++++++++++++++++++
 .../Operations/array_ops.cs                   |  2 +-
 .../GradientTest/GradientEagerTest.cs         | 14 +++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/src/TensorFlowNET.Core/Gradients/array_grad.cs b/src/TensorFlowNET.Core/Gradients/array_grad.cs
index 4b7027992..016e4f029 100644
--- a/src/TensorFlowNET.Core/Gradients/array_grad.cs
+++ b/src/TensorFlowNET.Core/Gradients/array_grad.cs
@@ -381,5 +381,29 @@ public static Tensor[] _ReverseV2Grad(Operation op, Tensor[] grads)
             var axis = op.inputs[1];
             return new Tensor[] { array_ops.reverse(grad, axis), null };
         }
+
+        [RegisterGradient("Tile")]
+        public static Tensor[] _TileGrad(Operation op, Tensor[] grads)
+        {
+            var grad = grads[0];
+            var input_shape = array_ops.shape(op.inputs[0], out_type: op.inputs[1].dtype);
+            var split_shape = array_ops.reshape(array_ops.transpose(array_ops.stack(new Tensor[] { op.inputs[1], input_shape })), new Shape(-1));
+            var axes = math_ops.range(0, array_ops.size(split_shape), 2);
+
+            //# Sum reduces grad along the first dimension for IndexedSlices
+            //if isinstance(grad, indexed_slices_lib.IndexedSlices):
+            //input_shape_0 = math_ops.cast(input_shape[0], grad.indices.dtype)
+            //grad = math_ops.unsorted_segment_sum(
+            //    grad.values, math_ops.mod(grad.indices, input_shape_0), input_shape_0)
+            //split_shape = array_ops.concat([[1], split_shape[1:]], axis = 0)
+
+            var input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes);
+            if (!tf.Context.executing_eagerly())
+            {
+                input_grad.set_shape(op.inputs[0].GetShape());
+            }
+            return new Tensor[] { input_grad, null };
+
+        }
     }
 }
diff --git a/src/TensorFlowNET.Core/Operations/array_ops.cs b/src/TensorFlowNET.Core/Operations/array_ops.cs
index fdc53cd7e..abf44c643 100644
--- a/src/TensorFlowNET.Core/Operations/array_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/array_ops.cs
@@ -990,7 +990,7 @@ public static Tensor gather(ResourceVariable @params, Tensor indices, string nam
             return @params.sparse_read(indices, name);
         }
 
-        public static Tensor transpose<T1>(T1 a, Axis perm, string name = "transpose", bool conjugate = false)
+        public static Tensor transpose<T1>(T1 a, Axis perm = null, string name = "transpose", bool conjugate = false)
         {
             return tf_with(ops.name_scope(name, "transpose", new { a }), scope =>
             {
diff --git a/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs b/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs
index e41e1d617..ed7599045 100644
--- a/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs
+++ b/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs
@@ -173,5 +173,19 @@ public void ConditionalMultiply()
             var result = grad(x, 4);
             Assert.AreEqual((float)result, 4.0f);
         }
+
+        [TestMethod]
+        public void Tile()
+        {
+            var a = tf.constant(new int[] { 1 }, TF_DataType.TF_FLOAT);
+            var b = tf.constant(new int[] { 2 });
+            using (var tape = tf.GradientTape())
+            {
+                tape.watch(a);
+                var y = tf.tile(a, b);
+                var grad = tape.gradient(y, a);
+                Assert.AreEqual((float)grad.numpy(), 2.0f);
+            }
+        }
     }
 }

From 3fcc4d8d1540c7c01ce4ca05ea883874abd4e5e5 Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Fri, 20 Oct 2023 11:30:33 +0800
Subject: [PATCH 39/77] fix: add the GRU, LSTM, SimpleRNN's OptionalArgs

---
 .../Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs       |  4 +---
 .../Keras/ArgsDefinition/Rnn/LSTMOptionalArgs.cs      | 11 +++++++++++
 .../Keras/ArgsDefinition/Rnn/SimpleRNNOptionalArgs.cs | 11 +++++++++++
 3 files changed, 23 insertions(+), 3 deletions(-)
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMOptionalArgs.cs
 create mode 100644 src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNOptionalArgs.cs

diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs
index d441dc828..1d215576f 100644
--- a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/GRUOptionalArgs.cs
@@ -4,10 +4,8 @@
 
 namespace Tensorflow.Keras.ArgsDefinition
 {
-    public class GRUOptionalArgs
+    public class GRUOptionalArgs : RnnOptionalArgs
     {
         public string Identifier => "GRU";
-
-        public Tensor Mask { get; set; } = null;
     }
 }
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMOptionalArgs.cs
new file mode 100644
index 000000000..2829927c3
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/LSTMOptionalArgs.cs
@@ -0,0 +1,11 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition.Rnn
+{
+    public class LSTMOptionalArgs : RnnOptionalArgs
+    {
+        public string Identifier => "LSTM";
+    }
+}
diff --git a/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNOptionalArgs.cs b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNOptionalArgs.cs
new file mode 100644
index 000000000..a8b8caf06
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/ArgsDefinition/Rnn/SimpleRNNOptionalArgs.cs
@@ -0,0 +1,11 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Tensorflow.Keras.ArgsDefinition.Rnn
+{
+    public class SimpleRNNOptionalArgs : RnnOptionalArgs
+    {
+        public string Identifier => "SimpleRNN";
+    }
+}

From d0ec6591a0cc0ea3325a7fc723435b23eabc757b Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Fri, 20 Oct 2023 15:40:35 +0800
Subject: [PATCH 40/77] fix: add the implementation of GatherND's grad

---
 src/TensorFlowNET.Core/APIs/tf.array.cs       | 10 ++++++++++
 .../Gradients/array_grad.cs                   | 19 +++++++++++++++++++
 .../Operations/array_ops.cs                   |  2 +-
 .../GradientTest/GradientEagerTest.cs         | 17 ++++++++++++++++-
 4 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/src/TensorFlowNET.Core/APIs/tf.array.cs b/src/TensorFlowNET.Core/APIs/tf.array.cs
index 4d9c3da58..b529cd319 100644
--- a/src/TensorFlowNET.Core/APIs/tf.array.cs
+++ b/src/TensorFlowNET.Core/APIs/tf.array.cs
@@ -140,6 +140,16 @@ public Tensor identity(Tensor input, string name = null)
         public Tensor gather(Tensor @params, Tensor indices, string name = null, int axis = 0)
             => array_ops.gather(@params, indices, name: name, axis: ops.convert_to_tensor(axis));
 
+        /// <summary>
+        /// Gather slices from `params` into a Tensor with shape specified by `indices`.
+        /// </summary>
+        /// <param name="params"></param>
+        /// <param name="indices"></param>
+        /// <param name="name"></param>
+        /// <returns></returns>
+        public Tensor gather_nd(Tensor @params, Tensor indices, string name = null)
+            => gen_array_ops.gather_nd(@params, indices, name: name);
+
         /// <summary>
         /// Return the elements, either from `x` or `y`, depending on the `condition`.
         /// </summary>
diff --git a/src/TensorFlowNET.Core/Gradients/array_grad.cs b/src/TensorFlowNET.Core/Gradients/array_grad.cs
index 016e4f029..a4da60eed 100644
--- a/src/TensorFlowNET.Core/Gradients/array_grad.cs
+++ b/src/TensorFlowNET.Core/Gradients/array_grad.cs
@@ -403,7 +403,26 @@ public static Tensor[] _TileGrad(Operation op, Tensor[] grads)
                 input_grad.set_shape(op.inputs[0].GetShape());
             }
             return new Tensor[] { input_grad, null };
+        }
 
+        [RegisterGradient("GatherNd")]
+        public static Tensor[] _GatherNdGrad(Operation op, Tensor[] grads)
+        {
+            var @ref = op.inputs[0];
+            var indices = op.inputs[1];
+            var grad = grads[0];
+            var ref_shape = array_ops.shape(@ref, out_type: indices.dtype);
+            Tensor ref_grad = null;
+            if (indices.shape.ndim == 2 && indices.shape.dims[indices.shape.Length - 1] == 1)
+            {
+                ref_grad = (Tensor)new IndexedSlices(grad, array_ops.squeeze(indices, axis: -1), ref_shape);
+            }
+            else
+            {
+                ref_grad = gen_array_ops.scatter_nd(indices, grad, ref_shape);
+            }
+            return new Tensor[] { ref_grad, null };
         }
+
     }
 }
diff --git a/src/TensorFlowNET.Core/Operations/array_ops.cs b/src/TensorFlowNET.Core/Operations/array_ops.cs
index abf44c643..57af3b835 100644
--- a/src/TensorFlowNET.Core/Operations/array_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/array_ops.cs
@@ -829,7 +829,7 @@ public static Tensor strided_slice_grad(Tensor shape, Tensor begin, Tensor end,
         /// <returns>A `Tensor`. Has the same type as `input`.
         /// Contains the same data as `input`, but has one or more dimensions of
         /// size 1 removed.</returns>
-        public static Tensor squeeze(Tensor input, int[] axis = null, string name = null)
+        public static Tensor squeeze(Tensor input, Axis axis = null, string name = null)
             => gen_array_ops.squeeze(input, axis, name);
 
         public static Tensor identity(Tensor input, string name = null)
diff --git a/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs b/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs
index ed7599045..1cfceb3e3 100644
--- a/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs
+++ b/test/TensorFlowNET.UnitTest/GradientTest/GradientEagerTest.cs
@@ -62,7 +62,7 @@ public void SquaredDifference_1D()
             // Calcute the gradient of (x1-x2)^2 
             // by Automatic Differentiation in Eager mode
             // Expected is 2*(abs(x1-x2))
-            Tensor x1 = new NDArray( new float[] { 1, 3, 5, 21, 19, 17 });
+            Tensor x1 = new NDArray(new float[] { 1, 3, 5, 21, 19, 17 });
             Tensor x2 = new NDArray(new float[] { 29, 27, 23, 7, 11, 13 });
             float[] expected = new float[]
             {
@@ -187,5 +187,20 @@ public void Tile()
                 Assert.AreEqual((float)grad.numpy(), 2.0f);
             }
         }
+
+        [TestMethod]
+        public void GatherNdTest()
+        {
+            var x = tf.constant(new float[,] { { 1.0f, 2.0f, 3.0f }, { 1.0f, 2.0f, 3.0f }, { 1.0f, 2.0f, 3.0f } }, dtype: TF_DataType.TF_FLOAT);
+            var indices = tf.constant(new int[,] { { 0, 1 }, { 1, 1 }, { 2, 1 } }, dtype: TF_DataType.TF_INT32);
+            using (var tape = tf.GradientTape())
+            {
+                tape.watch(x);
+                var res = tf.gather_nd(x, indices);
+                var grad = tape.gradient(res, x);
+                var expected = np.array(new float[,] { { 0f, 1f, 0f }, { 0f, 1f, 0f }, { 0f, 1f, 0f } });
+                Assert.IsTrue(Enumerable.SequenceEqual(grad.ToArray<float>(), expected.ToArray<float>()));
+            }
+        }
     }
 }

From 4e42d7f3a8ee574caf9c3896bb6438e88cbab211 Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Sat, 4 Nov 2023 10:18:50 +0800
Subject: [PATCH 41/77] fix: fix the bug of boolean_mask

---
 src/TensorFlowNET.Core/Operations/NnOps/rnn.cs      |  4 ++--
 src/TensorFlowNET.Core/Operations/array_ops.cs      | 13 +++++++++----
 src/TensorFlowNET.Core/Operations/nn_ops.cs         |  2 +-
 .../Basics/TensorTest.cs                            |  7 ++++---
 4 files changed, 16 insertions(+), 10 deletions(-)

diff --git a/src/TensorFlowNET.Core/Operations/NnOps/rnn.cs b/src/TensorFlowNET.Core/Operations/NnOps/rnn.cs
index 6b9f073c1..55f139207 100644
--- a/src/TensorFlowNET.Core/Operations/NnOps/rnn.cs
+++ b/src/TensorFlowNET.Core/Operations/NnOps/rnn.cs
@@ -428,9 +428,9 @@ public static Tensor _transpose_batch_time(Tensor x)
                 return x;
 
             var x_rank = array_ops.rank(x);
-            var con1 = new object[]
+            var con1 = new Tensor[]
             {
-                new []{1, 0 },
+                new Tensor(new int[]{0, 2}),
                 math_ops.range(2, x_rank)
             };
             var x_t = array_ops.transpose(x, array_ops.concat(con1, 0));
diff --git a/src/TensorFlowNET.Core/Operations/array_ops.cs b/src/TensorFlowNET.Core/Operations/array_ops.cs
index 57af3b835..1b424006d 100644
--- a/src/TensorFlowNET.Core/Operations/array_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/array_ops.cs
@@ -166,6 +166,11 @@ public static Tensor boolean_mask<T1, T2>(T1 tensor, T2 mask, string name = "boo
                     throw new ValueError("mask cannot be scalar.");
 
                 var leading_size = gen_math_ops.prod(shape(tensor_tensor)[$"{axis}:{axis + ndims_mask}"], ops.convert_to_tensor(new[] { 0 }));
+                if (leading_size.rank == 0)
+                {
+                    leading_size = expand_dims(leading_size, 0);
+                }
+
                 var shape1 = concat(new[]
                 {
                     shape(tensor_tensor)[$":{axis}"],
@@ -185,7 +190,7 @@ public static Tensor boolean_mask<T1, T2>(T1 tensor, T2 mask, string name = "boo
 
         private static Tensor _apply_mask_1d(Tensor reshaped_tensor, Tensor mask, int axis = 0)
         {
-            var indices = squeeze(where(mask), axis: new[] { 1 });
+            var indices = squeeze(where_v2(mask), axis: new[] { 1 });
             return gather(reshaped_tensor, indices, axis: ops.convert_to_tensor(axis));
         }
 
@@ -940,12 +945,12 @@ public static Tensor broadcast_static_shape(Tensor shape_x, Tensor shape_y)
         /// <returns></returns>
         public static Tensor concat(Tensor[] values, Tensor axis, string name = "concat")
         {
-            return tf.Context.ExecuteOp("ConcatV2", name, new ExecuteOpArgs(values, axis));
+            return gen_array_ops.concat_v2(values, axis, name: name);
         }
 
-        public static Tensor concat(object[] values, int axis, string name = "concat")
+        public static Tensor concat(Tensor[] values, Axis axis, string name = "concat")
         {
-            return tf.Context.ExecuteOp("ConcatV2", name, new ExecuteOpArgs(values, axis));
+            return gen_array_ops.concat_v2(values, axis, name: name);
         }
 
         /// <summary>
diff --git a/src/TensorFlowNET.Core/Operations/nn_ops.cs b/src/TensorFlowNET.Core/Operations/nn_ops.cs
index 00d7d316b..394a591ab 100644
--- a/src/TensorFlowNET.Core/Operations/nn_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/nn_ops.cs
@@ -287,7 +287,7 @@ private static Tensor _flatten_outer_dims(Tensor logits)
                 new[] { math_ops.subtract(rank, 1) },
                 new[] { constant_op.constant(1) });
 
-            var ops = array_ops.concat(new[] { new[] { -1 }, (object)last_dim_size }, 0);
+            var ops = array_ops.concat(new Tensor[] { new Tensor(new int[] {1}), last_dim_size }, 0);
             var output = array_ops.reshape(logits, ops);
 
             // Set output shape if known.
diff --git a/test/TensorFlowNET.Graph.UnitTest/Basics/TensorTest.cs b/test/TensorFlowNET.Graph.UnitTest/Basics/TensorTest.cs
index 90de78743..8093c1f23 100644
--- a/test/TensorFlowNET.Graph.UnitTest/Basics/TensorTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/Basics/TensorTest.cs
@@ -3,6 +3,7 @@
 using System;
 using System.Linq;
 using static Tensorflow.Binding;
+using Tensorflow;
 
 namespace TensorFlowNET.UnitTest.Basics
 {
@@ -60,14 +61,14 @@ public void batch_to_space_nd()
             Assert.IsTrue(Enumerable.SequenceEqual(new int[] { 15, 21, 16, 22, 17, 23 }, result[0, 3].ToArray<int>()));
         }
 
-        [TestMethod, Ignore]
+        [TestMethod]
         public void boolean_mask()
         {
+            if (!tf.executing_eagerly())
+                tf.enable_eager_execution();
             var tensor = new[] { 0, 1, 2, 3 };
             var mask = np.array(new[] { true, false, true, false });
             var masked = tf.boolean_mask(tensor, mask);
-            var sess = tf.Session();
-            var result = sess.run(masked);
             Assert.IsTrue(Enumerable.SequenceEqual(new int[] { 0, 2 }, masked.ToArray<int>()));
         }
     }

From f721baee711cc79a5270e72d73acb475ed4abaf0 Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Sun, 5 Nov 2023 14:05:41 +0800
Subject: [PATCH 42/77] test: add the concat_v2 test

---
 .../TensorFlow.Kernel.UnitTest.csproj         | 24 +++++++
 .../array_ops/concat_op_test.cs               | 65 +++++++++++++++++++
 TensorFlow.NET.sln                            | 21 ++++++
 3 files changed, 110 insertions(+)
 create mode 100644 TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
 create mode 100644 TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs

diff --git a/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj b/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
new file mode 100644
index 000000000..a52a4cda6
--- /dev/null
+++ b/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net6.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+
+    <IsPackable>false</IsPackable>
+    <IsTestProject>true</IsTestProject>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.5.0" />
+    <PackageReference Include="MSTest.TestAdapter" Version="2.2.10" />
+    <PackageReference Include="MSTest.TestFramework" Version="2.2.10" />
+    <PackageReference Include="coverlet.collector" Version="3.2.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\src\TensorFlowNET.Keras\Tensorflow.Keras.csproj" />
+    <ProjectReference Include="..\tools\Tensorflow.UnitTest.RedistHolder\Tensorflow.UnitTest.RedistHolder.csproj" />
+  </ItemGroup>
+
+</Project>
diff --git a/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs b/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs
new file mode 100644
index 000000000..cfa8f0fbf
--- /dev/null
+++ b/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs
@@ -0,0 +1,65 @@
+﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+using Tensorflow;
+using Tensorflow.NumPy;
+using TensorFlow;
+using static Tensorflow.Binding;
+using static Tensorflow.KerasApi;
+
+namespace TensorFlow.Kernel.UnitTest
+{
+    [TestClass]
+    public class concat_op_test
+    {
+        [TestMethod]
+        public void testConcatEmpty()
+        {
+            var t1 = tf.constant(new int[] { });
+            var t2 = tf.constant(new int[] { });
+            var c = array_ops.concat(new[] { t1, t2 }, 0);
+            var expected = np.array(new int[] { });
+            Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray<int>(), c.numpy().ToArray<int>()));
+        }
+
+        [TestMethod]
+        public void testConcatNegativeAxis()
+        {
+            var t1 = tf.constant(new int[,] {{ 1, 2, 3 }, { 4, 5, 6 } });
+            var t2 = tf.constant(new int[,] { { 7, 8, 9 }, { 10, 11, 12 } });
+            var c = array_ops.concat(new[] { t1, t2 }, -2);
+            var expected = np.array(new int[,,] { { { 1, 2, 3 }, { 4, 5, 6 } }, { { 7, 8, 9 }, { 10, 11, 12 } } });
+            Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray<int>(), c.numpy().ToArray<int>()));
+
+            c = array_ops.concat(new[] { t1, t2 }, -1);
+            expected = np.array(new int[,] { {  1, 2, 3, 7, 8, 9  }, {  4, 5, 6, 10, 11, 12  } });
+            Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray<int>(), c.numpy().ToArray<int>()));
+        }
+
+        [TestMethod]
+        [DataRow(TF_DataType.TF_INT32)]
+        [DataRow(TF_DataType.TF_INT64)]
+        [DataRow(TF_DataType.TF_UINT32)]
+        [DataRow(TF_DataType.TF_UINT64)]
+        public void testConcatDtype(TF_DataType dtype)
+        {
+            var t1 = tf.constant(new int[,] { { 1, 2, 3 }, { 4, 5, 6 } }, dtype: dtype);
+            var t2 = tf.constant(new int[,] { { 7, 8, 9 }, { 10, 11, 12 } }, dtype: dtype);
+            var c = array_ops.concat(new[] { t1, t2 }, 1);
+            var expected = np.array(new int[,] { { 1, 2, 3, 7, 8, 9 }, { 4, 5, 6, 10, 11, 12 } });
+            Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray<int>(), tf.cast(c, TF_DataType.TF_INT32).numpy().ToArray<int>()));
+
+        }
+
+        [TestMethod]
+        [DataRow(TF_DataType.TF_INT32)]
+        [DataRow(TF_DataType.TF_INT64)]
+        public void testConcatAxisType(TF_DataType dtype)
+        {
+            var t1 = tf.constant(new int[,] { { 1, 2, 3 }, {4, 5, 6 } });
+            var t2 = tf.constant(new int[,] { { 7, 8, 9 }, { 10, 11, 12 } });
+            var c = array_ops.concat(new[] { t1, t2 }, tf.constant(1, dtype: dtype));
+            var expected = np.array(new int[,] { { 1, 2, 3, 7, 8, 9 }, { 4, 5, 6, 10, 11, 12 } });
+            Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray<int>(), tf.cast(c, TF_DataType.TF_INT32).numpy().ToArray<int>()));
+        }
+
+    }
+}
diff --git a/TensorFlow.NET.sln b/TensorFlow.NET.sln
index 87729e27d..a246407b0 100644
--- a/TensorFlow.NET.sln
+++ b/TensorFlow.NET.sln
@@ -39,6 +39,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Benchmark", "too
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Console", "tools\TensorFlowNET.Console\Tensorflow.Console.csproj", "{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}"
 EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TensorFlow.Kernel.UnitTest", "TensorFlow.Kernel.UnitTest\TensorFlow.Kernel.UnitTest.csproj", "{C08C6692-4818-46C1-8462-2F0CC40C9152}"
+EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
 		Debug|Any CPU = Debug|Any CPU
@@ -322,6 +324,24 @@ Global
 		{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}.Release|x64.Build.0 = Release|x64
 		{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}.Release|x86.ActiveCfg = Release|Any CPU
 		{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}.Release|x86.Build.0 = Release|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|x64.Build.0 = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|x86.Build.0 = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|Any CPU.ActiveCfg = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|Any CPU.Build.0 = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|x64.ActiveCfg = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|x64.Build.0 = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|x86.ActiveCfg = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|x86.Build.0 = Debug|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|Any CPU.Build.0 = Release|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|x64.ActiveCfg = Release|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|x64.Build.0 = Release|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|x86.ActiveCfg = Release|Any CPU
+		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -342,6 +362,7 @@ Global
 		{D24FCAA5-548C-4251-B226-A1B6535D0845} = {E1A5D2B7-10AF-4876-85C0-7714EF274214}
 		{C23563DB-FE21-48E7-A411-87A109E4A899} = {E1A5D2B7-10AF-4876-85C0-7714EF274214}
 		{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0} = {E1A5D2B7-10AF-4876-85C0-7714EF274214}
+		{C08C6692-4818-46C1-8462-2F0CC40C9152} = {1B0918B9-65AD-4F34-A287-AF4597B27DBD}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {2DEAD3CC-486B-4918-A607-50B0DE7B114A}

From 8c06bbb0169f4c96c5c17bdd5fcbf07557665d03 Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Sun, 5 Nov 2023 20:47:58 +0800
Subject: [PATCH 43/77] fix: fix the bug caused by concat_v2

---
 src/TensorFlowNET.Core/Operations/NnOps/rnn.cs | 4 ++--
 src/TensorFlowNET.Core/Operations/array_ops.cs | 6 +++---
 src/TensorFlowNET.Core/Operations/nn_ops.cs    | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/TensorFlowNET.Core/Operations/NnOps/rnn.cs b/src/TensorFlowNET.Core/Operations/NnOps/rnn.cs
index 55f139207..6b9f073c1 100644
--- a/src/TensorFlowNET.Core/Operations/NnOps/rnn.cs
+++ b/src/TensorFlowNET.Core/Operations/NnOps/rnn.cs
@@ -428,9 +428,9 @@ public static Tensor _transpose_batch_time(Tensor x)
                 return x;
 
             var x_rank = array_ops.rank(x);
-            var con1 = new Tensor[]
+            var con1 = new object[]
             {
-                new Tensor(new int[]{0, 2}),
+                new []{1, 0 },
                 math_ops.range(2, x_rank)
             };
             var x_t = array_ops.transpose(x, array_ops.concat(con1, 0));
diff --git a/src/TensorFlowNET.Core/Operations/array_ops.cs b/src/TensorFlowNET.Core/Operations/array_ops.cs
index 1b424006d..548a885ed 100644
--- a/src/TensorFlowNET.Core/Operations/array_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/array_ops.cs
@@ -945,12 +945,12 @@ public static Tensor broadcast_static_shape(Tensor shape_x, Tensor shape_y)
         /// <returns></returns>
         public static Tensor concat(Tensor[] values, Tensor axis, string name = "concat")
         {
-            return gen_array_ops.concat_v2(values, axis, name: name);
+            return tf.Context.ExecuteOp("ConcatV2", name, new ExecuteOpArgs(values, axis));
         }
 
-        public static Tensor concat(Tensor[] values, Axis axis, string name = "concat")
+        public static Tensor concat(object[] values, int axis, string name = "concat")
         {
-            return gen_array_ops.concat_v2(values, axis, name: name);
+            return tf.Context.ExecuteOp("ConcatV2", name, new ExecuteOpArgs(values, axis));
         }
 
         /// <summary>
diff --git a/src/TensorFlowNET.Core/Operations/nn_ops.cs b/src/TensorFlowNET.Core/Operations/nn_ops.cs
index 394a591ab..00d7d316b 100644
--- a/src/TensorFlowNET.Core/Operations/nn_ops.cs
+++ b/src/TensorFlowNET.Core/Operations/nn_ops.cs
@@ -287,7 +287,7 @@ private static Tensor _flatten_outer_dims(Tensor logits)
                 new[] { math_ops.subtract(rank, 1) },
                 new[] { constant_op.constant(1) });
 
-            var ops = array_ops.concat(new Tensor[] { new Tensor(new int[] {1}), last_dim_size }, 0);
+            var ops = array_ops.concat(new[] { new[] { -1 }, (object)last_dim_size }, 0);
             var output = array_ops.reshape(logits, ops);
 
             // Set output shape if known.

From 7fd455041d85dc4143a4a6e4d876b9c22be51f51 Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Sun, 5 Nov 2023 21:51:33 +0800
Subject: [PATCH 44/77] refactor: refacter the place of the kernel unittest
 folder

---
 TensorFlow.NET.sln                            | 40 +++++++++----------
 .../TensorFlow.Kernel.UnitTest.csproj         |  4 +-
 .../array_ops/concat_op_test.cs               | 10 ++---
 3 files changed, 26 insertions(+), 28 deletions(-)
 rename {TensorFlow.Kernel.UnitTest => test/TensorFlow.Kernel.UnitTest}/TensorFlow.Kernel.UnitTest.csproj (74%)
 rename {TensorFlow.Kernel.UnitTest => test/TensorFlow.Kernel.UnitTest}/array_ops/concat_op_test.cs (89%)

diff --git a/TensorFlow.NET.sln b/TensorFlow.NET.sln
index a246407b0..214b039d4 100644
--- a/TensorFlow.NET.sln
+++ b/TensorFlow.NET.sln
@@ -39,7 +39,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Benchmark", "too
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Console", "tools\TensorFlowNET.Console\Tensorflow.Console.csproj", "{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TensorFlow.Kernel.UnitTest", "TensorFlow.Kernel.UnitTest\TensorFlow.Kernel.UnitTest.csproj", "{C08C6692-4818-46C1-8462-2F0CC40C9152}"
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TensorFlow.Kernel.UnitTest", "test\TensorFlow.Kernel.UnitTest\TensorFlow.Kernel.UnitTest.csproj", "{654A027D-1364-4729-880B-144DFE1FF5BB}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -324,24 +324,24 @@ Global
 		{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}.Release|x64.Build.0 = Release|x64
 		{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}.Release|x86.ActiveCfg = Release|Any CPU
 		{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}.Release|x86.Build.0 = Release|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|Any CPU.Build.0 = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|x64.ActiveCfg = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|x64.Build.0 = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|x86.ActiveCfg = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Debug|x86.Build.0 = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|Any CPU.ActiveCfg = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|Any CPU.Build.0 = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|x64.ActiveCfg = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|x64.Build.0 = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|x86.ActiveCfg = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.GPU|x86.Build.0 = Debug|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|Any CPU.ActiveCfg = Release|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|Any CPU.Build.0 = Release|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|x64.ActiveCfg = Release|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|x64.Build.0 = Release|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|x86.ActiveCfg = Release|Any CPU
-		{C08C6692-4818-46C1-8462-2F0CC40C9152}.Release|x86.Build.0 = Release|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|x64.Build.0 = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Debug|x86.Build.0 = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|Any CPU.ActiveCfg = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|Any CPU.Build.0 = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|x64.ActiveCfg = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|x64.Build.0 = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|x86.ActiveCfg = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.GPU|x86.Build.0 = Debug|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Release|Any CPU.Build.0 = Release|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x64.ActiveCfg = Release|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x64.Build.0 = Release|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x86.ActiveCfg = Release|Any CPU
+		{654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -362,7 +362,7 @@ Global
 		{D24FCAA5-548C-4251-B226-A1B6535D0845} = {E1A5D2B7-10AF-4876-85C0-7714EF274214}
 		{C23563DB-FE21-48E7-A411-87A109E4A899} = {E1A5D2B7-10AF-4876-85C0-7714EF274214}
 		{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0} = {E1A5D2B7-10AF-4876-85C0-7714EF274214}
-		{C08C6692-4818-46C1-8462-2F0CC40C9152} = {1B0918B9-65AD-4F34-A287-AF4597B27DBD}
+		{654A027D-1364-4729-880B-144DFE1FF5BB} = {1B0918B9-65AD-4F34-A287-AF4597B27DBD}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {2DEAD3CC-486B-4918-A607-50B0DE7B114A}
diff --git a/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj b/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
similarity index 74%
rename from TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
rename to test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
index a52a4cda6..68eb9e9b2 100644
--- a/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
+++ b/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
@@ -17,8 +17,8 @@
   </ItemGroup>
 
   <ItemGroup>
-    <ProjectReference Include="..\src\TensorFlowNET.Keras\Tensorflow.Keras.csproj" />
-    <ProjectReference Include="..\tools\Tensorflow.UnitTest.RedistHolder\Tensorflow.UnitTest.RedistHolder.csproj" />
+    <ProjectReference Include="..\..\tools\Tensorflow.UnitTest.RedistHolder\Tensorflow.UnitTest.RedistHolder.csproj" />
+    <ProjectReference Include="..\TensorFlowNET.Keras.UnitTest\Tensorflow.Keras.UnitTest.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs b/test/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs
similarity index 89%
rename from TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs
rename to test/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs
index cfa8f0fbf..67d0aa602 100644
--- a/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs
+++ b/test/TensorFlow.Kernel.UnitTest/array_ops/concat_op_test.cs
@@ -1,9 +1,7 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using Tensorflow;
 using Tensorflow.NumPy;
-using TensorFlow;
 using static Tensorflow.Binding;
-using static Tensorflow.KerasApi;
 
 namespace TensorFlow.Kernel.UnitTest
 {
@@ -23,14 +21,14 @@ public void testConcatEmpty()
         [TestMethod]
         public void testConcatNegativeAxis()
         {
-            var t1 = tf.constant(new int[,] {{ 1, 2, 3 }, { 4, 5, 6 } });
+            var t1 = tf.constant(new int[,] { { 1, 2, 3 }, { 4, 5, 6 } });
             var t2 = tf.constant(new int[,] { { 7, 8, 9 }, { 10, 11, 12 } });
             var c = array_ops.concat(new[] { t1, t2 }, -2);
             var expected = np.array(new int[,,] { { { 1, 2, 3 }, { 4, 5, 6 } }, { { 7, 8, 9 }, { 10, 11, 12 } } });
             Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray<int>(), c.numpy().ToArray<int>()));
 
             c = array_ops.concat(new[] { t1, t2 }, -1);
-            expected = np.array(new int[,] { {  1, 2, 3, 7, 8, 9  }, {  4, 5, 6, 10, 11, 12  } });
+            expected = np.array(new int[,] { { 1, 2, 3, 7, 8, 9 }, { 4, 5, 6, 10, 11, 12 } });
             Assert.IsTrue(Enumerable.SequenceEqual(expected.ToArray<int>(), c.numpy().ToArray<int>()));
         }
 
@@ -54,7 +52,7 @@ public void testConcatDtype(TF_DataType dtype)
         [DataRow(TF_DataType.TF_INT64)]
         public void testConcatAxisType(TF_DataType dtype)
         {
-            var t1 = tf.constant(new int[,] { { 1, 2, 3 }, {4, 5, 6 } });
+            var t1 = tf.constant(new int[,] { { 1, 2, 3 }, { 4, 5, 6 } });
             var t2 = tf.constant(new int[,] { { 7, 8, 9 }, { 10, 11, 12 } });
             var c = array_ops.concat(new[] { t1, t2 }, tf.constant(1, dtype: dtype));
             var expected = np.array(new int[,] { { 1, 2, 3, 7, 8, 9 }, { 4, 5, 6, 10, 11, 12 } });
@@ -62,4 +60,4 @@ public void testConcatAxisType(TF_DataType dtype)
         }
 
     }
-}
+}
\ No newline at end of file

From 7f0161445d1142f18ca2e18504e25fcad15e1d44 Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Sun, 5 Nov 2023 21:54:56 +0800
Subject: [PATCH 45/77] fix: fix a project reference mistake

---
 .../TensorFlow.Kernel.UnitTest.csproj                           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj b/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
index 68eb9e9b2..21b2731b7 100644
--- a/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
+++ b/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
@@ -17,8 +17,8 @@
   </ItemGroup>
 
   <ItemGroup>
+    <ProjectReference Include="..\..\src\TensorFlowNET.Keras\Tensorflow.Keras.csproj" />
     <ProjectReference Include="..\..\tools\Tensorflow.UnitTest.RedistHolder\Tensorflow.UnitTest.RedistHolder.csproj" />
-    <ProjectReference Include="..\TensorFlowNET.Keras.UnitTest\Tensorflow.Keras.UnitTest.csproj" />
   </ItemGroup>
 
 </Project>

From 94c0bb8796a06a4becb21687141f2a4451c9230e Mon Sep 17 00:00:00 2001
From: Haiping Chen <haiping008@gmail.com>
Date: Sun, 5 Nov 2023 15:02:16 -0600
Subject: [PATCH 46/77] Release v0.150.0 based on tensorflowv v2.15.0.

---
 README.md                                     | 19 ++++---------------
 .../APIs/c_api.customize.cs                   |  6 +++---
 .../Operations/Operation.cs                   |  2 +-
 .../Operations/handle_data_util.cs            |  2 +-
 .../Tensorflow.Binding.csproj                 | 14 +++++++++-----
 src/TensorFlowNET.Core/ops.cs                 |  2 +-
 .../Tensorflow.Keras.csproj                   |  9 +++++----
 src/TensorflowNET.Hub/Tensorflow.Hub.csproj   |  2 +-
 .../Tensorflow.Console.csproj                 |  5 +----
 .../Tensorflow.CodeGen.csproj                 |  1 -
 .../Tensorflow.UnitTest.RedistHolder.csproj   |  2 +-
 11 files changed, 27 insertions(+), 37 deletions(-)

diff --git a/README.md b/README.md
index 36ec1660c..0198c873c 100644
--- a/README.md
+++ b/README.md
@@ -15,20 +15,6 @@
 
 English | [中文](docs/README-CN.md)
 
-**=========================================================**
-
-### [Voting: Naming Convention Approach of v1.0.0](https://github.com/SciSharp/TensorFlow.NET/issues/1074)
-
-Dear all, 
-
-We would like to urge you to participate in our upcoming vote regarding the naming convention for TensorFlow.NET version 1.0.0 in [#1074](https://github.com/SciSharp/TensorFlow.NET/issues/1074). Your participation in the vote is essential to help us decide on the best approach for improving the naming convention used in previous versions.
-
-Thank you,
-
-TensorFlow.NET Authors
-
-**=========================================================**
-
 *master branch and v0.100.x is corresponding to tensorflow v2.10, v0.6x branch is from tensorflow v2.6, v0.15-tensorflow1.15 is from tensorflow1.15. Please add `https://www.myget.org/F/scisharp/api/v3/index.json` to nuget source to use nightly release.*
 
 
@@ -75,9 +61,12 @@ PM> Install-Package TensorFlow.Keras
 The second part is the computing support part. Only one of the following packages is needed, depending on your device and system.
 
 ```
-### CPU version for Windows, Linux and Mac
+### CPU version for Windows and Linux
 PM> Install-Package SciSharp.TensorFlow.Redist
 
+### CPU version for MacOS
+PM> Install-Package SciSharp.TensorFlow.Redist-OSX
+
 ### GPU version for Windows (CUDA and cuDNN are required)
 PM> Install-Package SciSharp.TensorFlow.Redist-Windows-GPU
 
diff --git a/src/TensorFlowNET.Core/APIs/c_api.customize.cs b/src/TensorFlowNET.Core/APIs/c_api.customize.cs
index 510e52eb7..bee4897ee 100644
--- a/src/TensorFlowNET.Core/APIs/c_api.customize.cs
+++ b/src/TensorFlowNET.Core/APIs/c_api.customize.cs
@@ -8,10 +8,10 @@ namespace Tensorflow
     public partial class c_api
     {
         [DllImport(TensorFlowLibName)]
-        public static extern void TFC_SetAttr(SafeGraphHandle graph, IntPtr op, string attr_name, SafeBufferHandle attr_value_proto, SafeStatusHandle status);
+        public static extern void TF_SetAttr(SafeGraphHandle graph, IntPtr op, string attr_name, SafeBufferHandle attr_value_proto, SafeStatusHandle status);
         [DllImport(TensorFlowLibName)]
-        public static extern SafeBufferHandle TFC_GetHandleShapeAndType(SafeGraphHandle c_graph, TF_Output output);
+        public static extern SafeBufferHandle TF_GetHandleShapeAndType(SafeGraphHandle c_graph, TF_Output output);
         [DllImport(TensorFlowLibName)]
-        public static extern void TFC_SetHandleShapeAndType(SafeGraphHandle c_graph, TF_Output output, byte[] data, long proto_len, SafeStatusHandle status);
+        public static extern void TF_SetHandleShapeAndType(SafeGraphHandle c_graph, TF_Output output, byte[] data, long proto_len, SafeStatusHandle status);
     }
 }
diff --git a/src/TensorFlowNET.Core/Operations/Operation.cs b/src/TensorFlowNET.Core/Operations/Operation.cs
index e59c381cb..2105c53fa 100644
--- a/src/TensorFlowNET.Core/Operations/Operation.cs
+++ b/src/TensorFlowNET.Core/Operations/Operation.cs
@@ -437,7 +437,7 @@ internal void _set_attr(string attr_name, AttrValue attr_value)
         internal void _set_attr_with_buf(string attr_name, Buffer attr_buf)
         {
             Status status = new();
-            c_api.TFC_SetAttr(graph, _handle, attr_name, attr_buf, status);
+            c_api.TF_SetAttr(graph, _handle, attr_name, attr_buf, status);
             status.Check(true);
         }
     }
diff --git a/src/TensorFlowNET.Core/Operations/handle_data_util.cs b/src/TensorFlowNET.Core/Operations/handle_data_util.cs
index a01efc520..363d3144e 100644
--- a/src/TensorFlowNET.Core/Operations/handle_data_util.cs
+++ b/src/TensorFlowNET.Core/Operations/handle_data_util.cs
@@ -51,7 +51,7 @@ public static void set_handle_data(Tensor target_t, HandleData handle_data)
             }
             Status status = new();
             var proto = handle_data.ToByteArray();
-            c_api.TFC_SetHandleShapeAndType(target_t.graph.c_graph, target_t._as_tf_output(), proto, proto.Length, status);
+            c_api.TF_SetHandleShapeAndType(target_t.graph.c_graph, target_t._as_tf_output(), proto, proto.Length, status);
             status.Check(true);
         }
 
diff --git a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
index 85c41bd2a..42c0399da 100644
--- a/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
+++ b/src/TensorFlowNET.Core/Tensorflow.Binding.csproj
@@ -4,8 +4,8 @@
     <TargetFrameworks>netstandard2.0;net6.0</TargetFrameworks>
     <AssemblyName>Tensorflow.Binding</AssemblyName>
     <RootNamespace>Tensorflow</RootNamespace>
-    <TargetTensorFlow>2.11.0</TargetTensorFlow>
-    <Version>0.110.4</Version>
+    <TargetTensorFlow>2.15.0</TargetTensorFlow>
+    <Version>0.150.0</Version>
     <LangVersion>10.0</LangVersion>
     <Nullable>enable</Nullable>
     <Authors>Haiping Chen, Eli Belash, Yaohui Liu, Meinrad Recheis</Authors>
@@ -20,8 +20,11 @@
     <Description>Google's TensorFlow full binding in .NET Standard.
 Building, training and infering deep learning models.
 https://tensorflownet.readthedocs.io</Description>
-    <AssemblyVersion>0.110.3.0</AssemblyVersion>
+    <AssemblyVersion>0.150.0.0</AssemblyVersion>
     <PackageReleaseNotes>
+		tf.net 0.150.x and above are based on tensorflow native 2.15.0
+		* Support BERT model.
+		
 		tf.net 0.110.x and above are based on tensorflow native 2.11.0
 		* Support RNN, LSTM model.
 		* Support Transformer model.
@@ -43,8 +46,9 @@ https://tensorflownet.readthedocs.io</Description>
 		tf.net 0.7x.x aligns with TensorFlow v2.7.x native library.
 		tf.net 0.10x.x aligns with TensorFlow v2.10.x native library.
 		tf.net 0.11x.x aligns with TensorFlow v2.11.x native library.
+		tf.net 0.15x.x aligns with TensorFlow v2.15.x native library.
 	</PackageReleaseNotes>
-    <FileVersion>0.110.4.0</FileVersion>
+    <FileVersion>0.150.0.0</FileVersion>
     <PackageLicenseFile>LICENSE</PackageLicenseFile>
     <PackageRequireLicenseAcceptance>true</PackageRequireLicenseAcceptance>
     <PackageOutputPath>packages</PackageOutputPath>
@@ -176,7 +180,7 @@ https://tensorflownet.readthedocs.io</Description>
     <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.149" />
     <PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
     <PackageReference Include="OneOf" Version="3.0.263" />
-    <PackageReference Include="Protobuf.Text" Version="0.7.1" />
+    <PackageReference Include="Protobuf.Text" Version="0.7.2" />
     <PackageReference Include="Razorvine.Pickle" Version="1.4.0" />
     <PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
   </ItemGroup>
diff --git a/src/TensorFlowNET.Core/ops.cs b/src/TensorFlowNET.Core/ops.cs
index 351fd18ff..6f51150a2 100644
--- a/src/TensorFlowNET.Core/ops.cs
+++ b/src/TensorFlowNET.Core/ops.cs
@@ -590,7 +590,7 @@ public static bool inside_function()
 
         public static HandleData get_resource_handle_data(Tensor graph_op)
         {
-            var handle_data = c_api.TFC_GetHandleShapeAndType(graph_op.graph.c_graph, graph_op._as_tf_output());
+            var handle_data = c_api.TF_GetHandleShapeAndType(graph_op.graph.c_graph, graph_op._as_tf_output());
             try{
                 var handle_str = c_api.ByteStringPiece(handle_data.DangerousGetHandle() == IntPtr.Zero ? null : new Buffer(handle_data));
                 return HandleData.Parser.ParseFrom(handle_str);
diff --git a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
index a0ee22284..eb8ebf93c 100644
--- a/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
+++ b/src/TensorFlowNET.Keras/Tensorflow.Keras.csproj
@@ -7,7 +7,7 @@
     <Nullable>enable</Nullable>
     <RootNamespace>Tensorflow.Keras</RootNamespace>
     <Platforms>AnyCPU;x64</Platforms>
-    <Version>0.11.4</Version>
+    <Version>0.15.0</Version>
     <Authors>Haiping Chen</Authors>
     <Product>Keras for .NET</Product>
     <Copyright>Apache 2.0, Haiping Chen since 2018</Copyright>
@@ -30,6 +30,7 @@
 		* Fixed memory leak for YOLOv3 model.
 		* Support RNN and LSTM models
 		* Support Transformer model
+		* Support BERT model
 	</PackageReleaseNotes>
     <Description>Keras for .NET
 
@@ -42,8 +43,8 @@ Keras is an API designed for human beings, not machines. Keras follows best prac
     <RepositoryType>Git</RepositoryType>
     <SignAssembly>False</SignAssembly>
     <AssemblyOriginatorKeyFile>Open.snk</AssemblyOriginatorKeyFile>
-    <AssemblyVersion>0.11.4.0</AssemblyVersion>
-    <FileVersion>0.11.4.0</FileVersion>
+    <AssemblyVersion>0.15.0.0</AssemblyVersion>
+    <FileVersion>0.15.0.0</FileVersion>
     <PackageLicenseFile>LICENSE</PackageLicenseFile>
     <Configurations>Debug;Release;GPU</Configurations>
   </PropertyGroup>
@@ -143,7 +144,7 @@ Keras is an API designed for human beings, not machines. Keras follows best prac
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="HDF5-CSharp" Version="1.18.0" />
+    <PackageReference Include="HDF5-CSharp" Version="1.19.0" />
     <PackageReference Include="MethodBoundaryAspect.Fody" Version="2.0.149" />
     <PackageReference Include="SharpZipLib" Version="1.4.2" />
   </ItemGroup>
diff --git a/src/TensorflowNET.Hub/Tensorflow.Hub.csproj b/src/TensorflowNET.Hub/Tensorflow.Hub.csproj
index 3c09f808e..efa37598d 100644
--- a/src/TensorflowNET.Hub/Tensorflow.Hub.csproj
+++ b/src/TensorflowNET.Hub/Tensorflow.Hub.csproj
@@ -26,7 +26,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="SharpCompress" Version="0.33.0" />
+    <PackageReference Include="SharpCompress" Version="0.34.1" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/tools/TensorFlowNET.Console/Tensorflow.Console.csproj b/tools/TensorFlowNET.Console/Tensorflow.Console.csproj
index ecc2d30b5..bb60b6b63 100644
--- a/tools/TensorFlowNET.Console/Tensorflow.Console.csproj
+++ b/tools/TensorFlowNET.Console/Tensorflow.Console.csproj
@@ -19,13 +19,10 @@
     <PlatformTarget>AnyCPU</PlatformTarget>
   </PropertyGroup>
 
-  <ItemGroup>
-    <PackageReference Include="SciSharp.TensorFlow.Redist" Version="2.11.4" />
-  </ItemGroup>
-
   <ItemGroup>
     <ProjectReference Include="..\..\src\TensorFlowNET.Recommenders\Tensorflow.Recommenders.csproj" />
     <ProjectReference Include="..\..\src\TensorFlowNET.Text\Tensorflow.Text.csproj" />
+    <ProjectReference Include="..\Tensorflow.UnitTest.RedistHolder\Tensorflow.UnitTest.RedistHolder.csproj" />
   </ItemGroup>
 
 </Project>
diff --git a/tools/Tensorflow.CodeGen/Tensorflow.CodeGen.csproj b/tools/Tensorflow.CodeGen/Tensorflow.CodeGen.csproj
index 03195e6ac..2afc68a3c 100644
--- a/tools/Tensorflow.CodeGen/Tensorflow.CodeGen.csproj
+++ b/tools/Tensorflow.CodeGen/Tensorflow.CodeGen.csproj
@@ -9,7 +9,6 @@
 
   <ItemGroup>
     <PackageReference Include="Microsoft.CodeAnalysis.CSharp.Scripting" Version="4.6.0-1.final" />
-    <PackageReference Include="Protobuf.Text" Version="0.7.1" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/tools/Tensorflow.UnitTest.RedistHolder/Tensorflow.UnitTest.RedistHolder.csproj b/tools/Tensorflow.UnitTest.RedistHolder/Tensorflow.UnitTest.RedistHolder.csproj
index 1ca387dbb..0d1018cab 100644
--- a/tools/Tensorflow.UnitTest.RedistHolder/Tensorflow.UnitTest.RedistHolder.csproj
+++ b/tools/Tensorflow.UnitTest.RedistHolder/Tensorflow.UnitTest.RedistHolder.csproj
@@ -5,7 +5,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="SciSharp.TensorFlow.Redist" Version="2.11.4" />
+    <PackageReference Include="SciSharp.TensorFlow.Redist" Version="2.16.0" />
     <PackageReference Include="SciSharp.TensorFlow.Redist-Lite" Version="2.6.0" />
   </ItemGroup>
 

From 53bd70bed3828a81e83bc1a2edbe1b3cbfab197a Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Tue, 7 Nov 2023 22:54:08 +0800
Subject: [PATCH 47/77] fix: fix the validation_pack when multiple input

---
 src/TensorFlowNET.Core/Util/Data.cs           | 26 ++++++++++++++-----
 .../Engine/DataAdapters/DataAdapter.cs        | 14 +++++++---
 .../Engine/Model.Evaluate.cs                  |  8 +++++-
 src/TensorFlowNET.Keras/Engine/Model.Fit.cs   | 23 +++++++++++++---
 4 files changed, 56 insertions(+), 15 deletions(-)

diff --git a/src/TensorFlowNET.Core/Util/Data.cs b/src/TensorFlowNET.Core/Util/Data.cs
index a14c69b18..4e5a65434 100644
--- a/src/TensorFlowNET.Core/Util/Data.cs
+++ b/src/TensorFlowNET.Core/Util/Data.cs
@@ -1,4 +1,5 @@
-﻿using Tensorflow.NumPy;
+﻿using OneOf;
+using Tensorflow.NumPy;
 
 namespace Tensorflow.Util
 {
@@ -8,10 +9,10 @@ namespace Tensorflow.Util
     /// </summary>
     public class ValidationDataPack
     {
-        public NDArray val_x;
+        public OneOf<NDArray, NDArray[]> val_x;
         public NDArray val_y;
         public NDArray val_sample_weight = null;
-
+        public bool val_x_is_array = false;
         public ValidationDataPack((NDArray, NDArray) validation_data)
         {
             this.val_x = validation_data.Item1;
@@ -27,15 +28,17 @@ public ValidationDataPack((NDArray, NDArray, NDArray) validation_data)
 
         public ValidationDataPack((IEnumerable<NDArray>, NDArray) validation_data)
         {
-            this.val_x = validation_data.Item1.ToArray()[0];
+            this.val_x = validation_data.Item1.ToArray();
             this.val_y = validation_data.Item2;
+            val_x_is_array = true;
         }
 
         public ValidationDataPack((IEnumerable<NDArray>, NDArray, NDArray) validation_data)
         {
-            this.val_x = validation_data.Item1.ToArray()[0];
+            this.val_x = validation_data.Item1.ToArray();
             this.val_y = validation_data.Item2;
             this.val_sample_weight = validation_data.Item3;
+            val_x_is_array = true;
         }
 
         public static implicit operator ValidationDataPack((NDArray, NDArray) validation_data)
@@ -52,15 +55,24 @@ public static implicit operator ValidationDataPack((IEnumerable<NDArray>, NDArra
 
         public void Deconstruct(out NDArray val_x, out NDArray val_y)
         {
-            val_x = this.val_x;
+            val_x = this.val_x.AsT0;
             val_y = this.val_y;
         }
 
         public void Deconstruct(out NDArray val_x, out NDArray val_y, out NDArray val_sample_weight)
         {
-            val_x = this.val_x;
+            val_x = this.val_x.AsT0;
+            val_y = this.val_y;
+            val_sample_weight = this.val_sample_weight;
+        }
+
+        // add a unuse parameter to make it different from Deconstruct(out NDArray val_x, out NDArray val_y, out NDArray val_sample_weight)
+        public void Deconstruct(out NDArray[] val_x_array, out NDArray val_y, out NDArray val_sample_weight, out NDArray unuse)
+        {
+            val_x_array = this.val_x.AsT1;
             val_y = this.val_y;
             val_sample_weight = this.val_sample_weight;
+            unuse = null;
         }
     }
 }
diff --git a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs
index b2750496a..590f30a78 100644
--- a/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs
+++ b/src/TensorFlowNET.Keras/Engine/DataAdapters/DataAdapter.cs
@@ -92,9 +92,17 @@ public static ((IEnumerable<NDArray>, NDArray, NDArray), ValidationDataPack) tra
             var train_y = y[new Slice(0, train_count)];
             var val_x = x.Select(x => x[new Slice(train_count)] as NDArray);
             var val_y = y[new Slice(train_count)];
-            NDArray tmp_sample_weight = sample_weight;
-            sample_weight = sample_weight[new Slice(0, train_count)];
-            ValidationDataPack validation_data = (val_x, val_y, tmp_sample_weight[new Slice(train_count)]);
+
+            ValidationDataPack validation_data;
+            if (sample_weight != null)
+            {
+                validation_data = (val_x, val_y, sample_weight[new Slice(train_count)]);
+                sample_weight = sample_weight[new Slice(0, train_count)];
+            }
+            else
+            {
+                validation_data = (val_x, val_y);
+            }
             return ((train_x, train_y, sample_weight), validation_data);
         }
     }
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
index 474d5e5a5..b3264429e 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
@@ -70,13 +70,19 @@ public Dictionary<string, float> evaluate(NDArray x, NDArray y,
             return evaluate(data_handler, callbacks, is_val, test_function);
         }
 
-        public Dictionary<string, float> evaluate(IEnumerable<Tensor> x, Tensor y, int verbose = 1, bool is_val = false)
+        public Dictionary<string, float> evaluate(
+            IEnumerable<Tensor> x, 
+            Tensor y, 
+            int verbose = 1,
+            NDArray sample_weight = null,
+            bool is_val = false)
         {
             var data_handler = new DataHandler(new DataHandlerArgs
             {
                 X = new Tensors(x.ToArray()),
                 Y = y,
                 Model = this,
+                SampleWeight = sample_weight,
                 StepsPerExecution = _steps_per_execution
             });
 
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
index d61211c71..13a1b63bc 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
@@ -7,6 +7,7 @@
 using System.Diagnostics;
 using Tensorflow.Keras.Callbacks;
 using Tensorflow.Util;
+using OneOf;
 
 namespace Tensorflow.Keras.Engine
 {
@@ -287,10 +288,24 @@ History FitInternal(DataHandler data_handler, int epochs, int verbose, List<ICal
 
                 if (validation_data != null)
                 {
-                    // Because evaluate calls call_test_batch_end, this interferes with our output on the screen
-                    // so we need to pass a is_val parameter to stop on_test_batch_end
-                    var (val_x, val_y, val_sample_weight) = validation_data;
-                    var val_logs = evaluate(val_x, val_y, sample_weight:val_sample_weight, is_val:true);
+                    NDArray val_x;
+                    NDArray[] val_x_array;
+                    NDArray val_y;
+                    NDArray val_sample_weight;
+                    Dictionary<string, float> val_logs;
+                    if (!validation_data.val_x_is_array)
+                    {
+                        (val_x, val_y, val_sample_weight) = validation_data;
+                        // Because evaluate calls call_test_batch_end, this interferes with our output on the screen
+                        // so we need to pass a is_val parameter to stop on_test_batch_end
+                        val_logs = evaluate(val_x, val_y, sample_weight: val_sample_weight, is_val: true);
+
+                    }
+                    else
+                    {
+                        (val_x_array, val_y, val_sample_weight, _) = validation_data;
+                         val_logs = evaluate(val_x_array, val_y, sample_weight: val_sample_weight, is_val: true);
+                    }
                     foreach (var log in val_logs)
                     {
                         logs["val_" + log.Key] = log.Value;

From d453fb6611f4acb3ab405579ae804279d6e07cbe Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Tue, 7 Nov 2023 23:34:37 +0800
Subject: [PATCH 48/77] refactor: declare some field of ValidationPack as
 internal

---
 src/TensorFlowNET.Core/Util/Data.cs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/TensorFlowNET.Core/Util/Data.cs b/src/TensorFlowNET.Core/Util/Data.cs
index 4e5a65434..388efc50f 100644
--- a/src/TensorFlowNET.Core/Util/Data.cs
+++ b/src/TensorFlowNET.Core/Util/Data.cs
@@ -9,9 +9,9 @@ namespace Tensorflow.Util
     /// </summary>
     public class ValidationDataPack
     {
-        public OneOf<NDArray, NDArray[]> val_x;
-        public NDArray val_y;
-        public NDArray val_sample_weight = null;
+        internal OneOf<NDArray, NDArray[]> val_x;
+        internal NDArray val_y;
+        internal NDArray val_sample_weight = null;
         public bool val_x_is_array = false;
         public ValidationDataPack((NDArray, NDArray) validation_data)
         {
@@ -33,7 +33,7 @@ public ValidationDataPack((IEnumerable<NDArray>, NDArray) validation_data)
             val_x_is_array = true;
         }
 
-        public ValidationDataPack((IEnumerable<NDArray>, NDArray, NDArray) validation_data)
+        internal ValidationDataPack((IEnumerable<NDArray>, NDArray, NDArray) validation_data)
         {
             this.val_x = validation_data.Item1.ToArray();
             this.val_y = validation_data.Item2;

From 47e9019a187744bf31e315525ffe352dad36a00c Mon Sep 17 00:00:00 2001
From: Wanglongzhi2001 <583087864@qq.com>
Date: Tue, 7 Nov 2023 23:36:15 +0800
Subject: [PATCH 49/77] refactor: fix a typo

---
 src/TensorFlowNET.Core/Util/Data.cs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/TensorFlowNET.Core/Util/Data.cs b/src/TensorFlowNET.Core/Util/Data.cs
index 388efc50f..fe3466ed0 100644
--- a/src/TensorFlowNET.Core/Util/Data.cs
+++ b/src/TensorFlowNET.Core/Util/Data.cs
@@ -33,7 +33,7 @@ public ValidationDataPack((IEnumerable<NDArray>, NDArray) validation_data)
             val_x_is_array = true;
         }
 
-        internal ValidationDataPack((IEnumerable<NDArray>, NDArray, NDArray) validation_data)
+        public ValidationDataPack((IEnumerable<NDArray>, NDArray, NDArray) validation_data)
         {
             this.val_x = validation_data.Item1.ToArray();
             this.val_y = validation_data.Item2;

From 2a377e2f91b40083f5de86f01b57b32bad5a5932 Mon Sep 17 00:00:00 2001
From: Alexander Novikov <novikov_alexander@live.ru>
Date: Tue, 7 Nov 2023 19:23:34 +0000
Subject: [PATCH 50/77] tests are passing

---
 .../Variables/variables.py.cs                 |  8 ----
 test/TensorFlowNET.UnitTest/PythonTest.cs     | 40 ++++++++++++-------
 .../Training/GradientDescentOptimizerTests.cs | 33 +++++++++------
 3 files changed, 46 insertions(+), 35 deletions(-)

diff --git a/src/TensorFlowNET.Core/Variables/variables.py.cs b/src/TensorFlowNET.Core/Variables/variables.py.cs
index f3ae248e6..91f57e292 100644
--- a/src/TensorFlowNET.Core/Variables/variables.py.cs
+++ b/src/TensorFlowNET.Core/Variables/variables.py.cs
@@ -154,13 +154,5 @@ public static Operation _safe_initial_value_from_op(string name, Operation op, D
 
             return op;
         }
-
-        public static Tensor global_variables_initializer()
-        {
-            // if context.executing_eagerly():
-            //      return control_flow_ops.no_op(name = "global_variables_initializer")
-            var group =  variables_initializer(global_variables().ToArray());
-            return group;
-        }
     }
 }
diff --git a/test/TensorFlowNET.UnitTest/PythonTest.cs b/test/TensorFlowNET.UnitTest/PythonTest.cs
index 12fd72360..090ef097c 100644
--- a/test/TensorFlowNET.UnitTest/PythonTest.cs
+++ b/test/TensorFlowNET.UnitTest/PythonTest.cs
@@ -6,6 +6,7 @@
 using System.Linq;
 using Tensorflow;
 using static Tensorflow.Binding;
+using System.Collections.Generic;
 
 namespace TensorFlowNET.UnitTest
 {
@@ -144,11 +145,12 @@ public void assertAllClose(double value, NDArray array2, double eps = 1e-5)
             Assert.IsTrue(np.allclose(array1, array2, rtol: eps));
         }
 
-        private class CollectionComparer : System.Collections.IComparer
+        private class CollectionComparer : IComparer
         {
             private readonly double _epsilon;
 
-            public CollectionComparer(double eps = 1e-06) {
+            public CollectionComparer(double eps = 1e-06)
+            {
                 _epsilon = eps;
             }
             public int Compare(object x, object y)
@@ -166,13 +168,15 @@ public int Compare(object x, object y)
         }
 
         public void assertAllCloseAccordingToType<T>(
-            T[] expected,
-            T[] given,
+            ICollection expected,
+            ICollection<T> given,
             double eps = 1e-6,
             float float_eps = 1e-6f)
         {
             // TODO: check if any of arguments is not double and change toletance
-            CollectionAssert.AreEqual(expected, given, new CollectionComparer(eps));
+            // remove givenAsDouble and cast expected instead
+            var givenAsDouble = given.Select(x => Convert.ToDouble(x)).ToArray();
+            CollectionAssert.AreEqual(expected, givenAsDouble, new CollectionComparer(eps));
         }
 
         public void assertProtoEquals(object toProto, object o)
@@ -241,17 +245,25 @@ public T evaluate<T>(Tensor tensor)
             //    return self._eval_helper(tensors)
             //  else:
             {
-                var sess = tf.Session();
+                var sess = tf.get_default_session();
                 var ndarray = tensor.eval(sess);
-                if (typeof(T) == typeof(double))
+                if (typeof(T) == typeof(double)
+                    || typeof(T) == typeof(float)
+                    || typeof(T) == typeof(int))
+                {
+                    result = Convert.ChangeType(ndarray, typeof(T));
+                }
+                else if (typeof(T) == typeof(double[]))
+                {
+                    result = ndarray.ToMultiDimArray<double>();
+                }
+                else if (typeof(T) == typeof(float[]))
                 {
-                    double x = ndarray;
-                    result = x;
+                    result = ndarray.ToMultiDimArray<float>();
                 }
-                else if (typeof(T) == typeof(int))
+                else if (typeof(T) == typeof(int[]))
                 {
-                    int x = ndarray;
-                    result = x;
+                    result = ndarray.ToMultiDimArray<int>();
                 }
                 else
                 {
@@ -457,12 +469,12 @@ private Session _get_cached_session(
             else
             {
 
-                if (crash_if_inconsistent_args && !self._cached_graph.Equals(graph))
+                if (crash_if_inconsistent_args && self._cached_graph != null && !self._cached_graph.Equals(graph))
                     throw new ValueError(@"The graph used to get the cached session is 
                                            different than the one that was used to create the
                                            session. Maybe create a new session with 
                                            self.session()");
-                if (crash_if_inconsistent_args && !self._cached_config.Equals(config))
+                if (crash_if_inconsistent_args && self._cached_config != null && !self._cached_config.Equals(config))
                 {
                     throw new ValueError(@"The config used to get the cached session is 
                                            different than the one that was used to create the 
diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
index 977544ae9..3059068f4 100644
--- a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
+++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
@@ -1,8 +1,6 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
 using System.Linq;
-using System.Runtime.Intrinsics.X86;
-using System.Security.AccessControl;
 using Tensorflow.NumPy;
 using TensorFlowNET.UnitTest;
 using static Tensorflow.Binding;
@@ -12,18 +10,23 @@ namespace Tensorflow.Keras.UnitTest.Optimizers
     [TestClass]
     public class GradientDescentOptimizerTest : PythonTest
     {
-        private void TestBasicGeneric<T>() where T : struct
+        private static TF_DataType GetTypeForNumericType<T>() where T : struct
         {
-            var dtype = Type.GetTypeCode(typeof(T)) switch
+            return Type.GetTypeCode(typeof(T)) switch
             {
                 TypeCode.Single => np.float32,
                 TypeCode.Double => np.float64,
                 _ => throw new NotImplementedException(),
             };
+        }
+
+        private void TestBasicGeneric<T>() where T : struct
+        {
+            var dtype = GetTypeForNumericType<T>();
 
             // train.GradientDescentOptimizer is V1 only API.
             tf.Graph().as_default();
-            using (self.cached_session())
+            using (var sess = self.cached_session())
             {
                 var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype);
                 var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype);
@@ -36,21 +39,25 @@ private void TestBasicGeneric<T>() where T : struct
                 };
                 var sgd_op = optimizer.apply_gradients(grads_and_vars);
 
-                var global_variables = variables.global_variables_initializer();
-                self.evaluate<T>(global_variables);
+                var global_variables = tf.global_variables_initializer();
+                sess.run(global_variables);
+
                 // Fetch params to validate initial values
+                var initialVar0 = sess.run(var0);
+                var valu = var0.eval(sess);
+                var initialVar1 = sess.run(var1);
                 // TODO: use self.evaluate<T[]> instead of self.evaluate<double[]>
-                self.assertAllCloseAccordingToType(new double[] { 1.0, 2.0 }, self.evaluate<double[]>(var0));
-                self.assertAllCloseAccordingToType(new double[] { 3.0, 4.0 }, self.evaluate<double[]>(var1));
+                self.assertAllCloseAccordingToType(new[] { 1.0, 2.0 }, self.evaluate<T[]>(var0));
+                self.assertAllCloseAccordingToType(new[] { 3.0, 4.0 }, self.evaluate<T[]>(var1));
                 // Run 1 step of sgd
                 sgd_op.run();
                 // Validate updated params
                 self.assertAllCloseAccordingToType(
-                    new double[] { 1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1 },
-                    self.evaluate<double[]>(var0));
+                    new[] { 1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1 },
+                    self.evaluate<T[]>(var0));
                 self.assertAllCloseAccordingToType(
-                    new double[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 },
-                    self.evaluate<double[]>(var1));
+                    new[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 },
+                    self.evaluate<T[]>(var1));
                 // TODO: self.assertEqual(0, len(optimizer.variables()));
             }
         }

From f7b8dba00b2465114926072d4a82924dc35596d7 Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Wed, 8 Nov 2023 15:16:02 +0000
Subject: [PATCH 51/77] small fixes

---
 .../Training/GradientDescentOptimizerTests.cs      | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
index 3059068f4..1a650a864 100644
--- a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
+++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
@@ -1,4 +1,5 @@
-﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+﻿using Microsoft.VisualStudio.TestPlatform.Utilities;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
 using System.Linq;
 using Tensorflow.NumPy;
@@ -20,7 +21,7 @@ private static TF_DataType GetTypeForNumericType<T>() where T : struct
             };
         }
 
-        private void TestBasicGeneric<T>() where T : struct
+        private void TestBasic<T>() where T : struct
         {
             var dtype = GetTypeForNumericType<T>();
 
@@ -42,11 +43,9 @@ private void TestBasicGeneric<T>() where T : struct
                 var global_variables = tf.global_variables_initializer();
                 sess.run(global_variables);
 
-                // Fetch params to validate initial values
                 var initialVar0 = sess.run(var0);
-                var valu = var0.eval(sess);
                 var initialVar1 = sess.run(var1);
-                // TODO: use self.evaluate<T[]> instead of self.evaluate<double[]>
+                // Fetch params to validate initial values
                 self.assertAllCloseAccordingToType(new[] { 1.0, 2.0 }, self.evaluate<T[]>(var0));
                 self.assertAllCloseAccordingToType(new[] { 3.0, 4.0 }, self.evaluate<T[]>(var1));
                 // Run 1 step of sgd
@@ -66,10 +65,9 @@ private void TestBasicGeneric<T>() where T : struct
         public void TestBasic()
         {
             //TODO: add np.half
-            TestBasicGeneric<float>();
-            TestBasicGeneric<double>();
+            TestBasic<float>();
+            TestBasic<double>();
         }
 
-
     }
 }

From c906f46aadaf2e2f0d1769f026270ba912ef95be Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Wed, 8 Nov 2023 15:24:13 +0000
Subject: [PATCH 52/77] learning rate test

---
 .../Training/GradientDescentOptimizerTests.cs | 49 +++++++++++++++++++
 1 file changed, 49 insertions(+)

diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
index 1a650a864..92fe97706 100644
--- a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
+++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
@@ -1,6 +1,7 @@
 ﻿using Microsoft.VisualStudio.TestPlatform.Utilities;
 using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
+using System.Diagnostics;
 using System.Linq;
 using Tensorflow.NumPy;
 using TensorFlowNET.UnitTest;
@@ -69,5 +70,53 @@ public void TestBasic()
             TestBasic<double>();
         }
 
+        private void TestTensorLearningRate<T>() where T : struct
+        {
+            var dtype = GetTypeForNumericType<T>();
+
+            // train.GradientDescentOptimizer is V1 only API.
+            tf.Graph().as_default();
+            using (var sess = self.cached_session())
+            {
+                var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype);
+                var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype);
+                var grads0 = tf.constant(new[] { 0.1, 0.1 }, dtype: dtype);
+                var grads1 = tf.constant(new[] { 0.01, 0.01 }, dtype: dtype);
+                var lrate = constant_op.constant(3.0);
+                var grads_and_vars = new[] {
+                    Tuple.Create(grads0, var0 as IVariableV1),
+                    Tuple.Create(grads1, var1 as IVariableV1)
+                };
+                var sgd_op = tf.train.GradientDescentOptimizer(lrate)
+                    .apply_gradients(grads_and_vars);
+
+                var global_variables = tf.global_variables_initializer();
+                sess.run(global_variables);
+
+                var initialVar0 = sess.run(var0);
+                var initialVar1 = sess.run(var1);
+                // Fetch params to validate initial values
+                self.assertAllCloseAccordingToType(new[] { 1.0, 2.0 }, self.evaluate<T[]>(var0));
+                self.assertAllCloseAccordingToType(new[] { 3.0, 4.0 }, self.evaluate<T[]>(var1));
+                // Run 1 step of sgd
+                sgd_op.run();
+                // Validate updated params
+                self.assertAllCloseAccordingToType(
+                    new[] { 1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1 },
+                    self.evaluate<T[]>(var0));
+                self.assertAllCloseAccordingToType(
+                    new[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 },
+                    self.evaluate<T[]>(var1));
+                // TODO: self.assertEqual(0, len(optimizer.variables()));
+            }
+        }
+
+        [TestMethod]
+        public void TestTensorLearningRate()
+        {
+            //TODO: add np.half
+            TestTensorLearningRate<float>();
+            TestTensorLearningRate<double>();
+        }
     }
 }

From 149caaec11b649e6f9e85320a1f18689c32cae6c Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Fri, 10 Nov 2023 02:44:01 +0000
Subject: [PATCH 53/77] test ci

---
 .../Training/GradientDescentOptimizerTests.cs        | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
index 92fe97706..98738528d 100644
--- a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
+++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
@@ -27,8 +27,8 @@ private void TestBasic<T>() where T : struct
             var dtype = GetTypeForNumericType<T>();
 
             // train.GradientDescentOptimizer is V1 only API.
-            tf.Graph().as_default();
-            using (var sess = self.cached_session())
+            //tf.Graph().as_default();
+            /*using (var sess = self.cached_session())
             {
                 var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype);
                 var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype);
@@ -59,7 +59,7 @@ private void TestBasic<T>() where T : struct
                     new[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 },
                     self.evaluate<T[]>(var1));
                 // TODO: self.assertEqual(0, len(optimizer.variables()));
-            }
+            }*/
         }
 
         [TestMethod]
@@ -67,7 +67,7 @@ public void TestBasic()
         {
             //TODO: add np.half
             TestBasic<float>();
-            TestBasic<double>();
+            // TestBasic<double>();
         }
 
         private void TestTensorLearningRate<T>() where T : struct
@@ -115,8 +115,8 @@ private void TestTensorLearningRate<T>() where T : struct
         public void TestTensorLearningRate()
         {
             //TODO: add np.half
-            TestTensorLearningRate<float>();
-            TestTensorLearningRate<double>();
+            // TestTensorLearningRate<float>();
+            // TestTensorLearningRate<double>();
         }
     }
 }

From 2cb5fd66f842832a2254155f296a54764473f5cd Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Fri, 10 Nov 2023 13:53:40 +0000
Subject: [PATCH 54/77] new graph

---
 .../Training/BasicLinearModel.cs                |  2 ++
 .../Training/GradientDescentOptimizerTests.cs   | 17 +++++++----------
 2 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs b/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs
index 1283ecaf2..a37f28920 100644
--- a/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs
+++ b/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs
@@ -15,6 +15,8 @@ public class BasicLinearModel
         [TestMethod]
         public void LinearRegression()
         {
+            tf.Graph().as_default();
+
             // Initialize the weights to `5.0` and the bias to `0.0`
             // In practice, these should be initialized to random values (for example, with `tf.random.normal`)
             var W = tf.Variable(5.0f);
diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
index 98738528d..1632f1e73 100644
--- a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
+++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
@@ -1,8 +1,5 @@
-﻿using Microsoft.VisualStudio.TestPlatform.Utilities;
-using Microsoft.VisualStudio.TestTools.UnitTesting;
+﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
-using System.Diagnostics;
-using System.Linq;
 using Tensorflow.NumPy;
 using TensorFlowNET.UnitTest;
 using static Tensorflow.Binding;
@@ -27,8 +24,8 @@ private void TestBasic<T>() where T : struct
             var dtype = GetTypeForNumericType<T>();
 
             // train.GradientDescentOptimizer is V1 only API.
-            //tf.Graph().as_default();
-            /*using (var sess = self.cached_session())
+            tf.Graph().as_default();
+            using (var sess = self.cached_session())
             {
                 var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype);
                 var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype);
@@ -59,7 +56,7 @@ private void TestBasic<T>() where T : struct
                     new[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 },
                     self.evaluate<T[]>(var1));
                 // TODO: self.assertEqual(0, len(optimizer.variables()));
-            }*/
+            }
         }
 
         [TestMethod]
@@ -67,7 +64,7 @@ public void TestBasic()
         {
             //TODO: add np.half
             TestBasic<float>();
-            // TestBasic<double>();
+            TestBasic<double>();
         }
 
         private void TestTensorLearningRate<T>() where T : struct
@@ -115,8 +112,8 @@ private void TestTensorLearningRate<T>() where T : struct
         public void TestTensorLearningRate()
         {
             //TODO: add np.half
-            // TestTensorLearningRate<float>();
-            // TestTensorLearningRate<double>();
+            TestTensorLearningRate<float>();
+            TestTensorLearningRate<double>();
         }
     }
 }

From 09d466d697e58d97598bbee248ffd7ceb8a7be92 Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Fri, 10 Nov 2023 14:00:51 +0000
Subject: [PATCH 55/77] ci test

---
 test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs b/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs
index a37f28920..d0da1d5b9 100644
--- a/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs
+++ b/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs
@@ -15,7 +15,9 @@ public class BasicLinearModel
         [TestMethod]
         public void LinearRegression()
         {
-            tf.Graph().as_default();
+            var graph = tf.Graph().as_default();
+            var sess = new Session(graph);
+            sess.as_default();
 
             // Initialize the weights to `5.0` and the bias to `0.0`
             // In practice, these should be initialized to random values (for example, with `tf.random.normal`)

From c5b4928bd6eaa9fcff9d0e71932cd7c1587d1eb6 Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Fri, 10 Nov 2023 14:28:41 +0000
Subject: [PATCH 56/77] correct namespace

passing
---
 test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs      | 4 ----
 .../Training/GradientDescentOptimizerTests.cs                 | 4 ++--
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs b/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs
index d0da1d5b9..1283ecaf2 100644
--- a/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs
+++ b/test/TensorFlowNET.UnitTest/Training/BasicLinearModel.cs
@@ -15,10 +15,6 @@ public class BasicLinearModel
         [TestMethod]
         public void LinearRegression()
         {
-            var graph = tf.Graph().as_default();
-            var sess = new Session(graph);
-            sess.as_default();
-
             // Initialize the weights to `5.0` and the bias to `0.0`
             // In practice, these should be initialized to random values (for example, with `tf.random.normal`)
             var W = tf.Variable(5.0f);
diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
index 1632f1e73..d766890b2 100644
--- a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
+++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
@@ -1,10 +1,10 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
+using Tensorflow;
 using Tensorflow.NumPy;
-using TensorFlowNET.UnitTest;
 using static Tensorflow.Binding;
 
-namespace Tensorflow.Keras.UnitTest.Optimizers
+namespace TensorFlowNET.UnitTest.Training
 {
     [TestClass]
     public class GradientDescentOptimizerTest : PythonTest

From fc8f493187bd382bc994c4f79c17b369611cca36 Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Fri, 10 Nov 2023 20:47:49 +0000
Subject: [PATCH 57/77] common assembly for python test

---
 TensorFlow.NET.sln                            |  23 +-
 .../PythonTest.cs                             | 448 ------------------
 .../TensorFlowNET.Graph.UnitTest.csproj       |   1 +
 .../Tensorflow.Binding.UnitTest.csproj        |   1 +
 .../PythonTest.cs                             |   3 -
 .../Tensorflow.UnitTest.csproj                |  24 +
 6 files changed, 48 insertions(+), 452 deletions(-)
 delete mode 100644 test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
 rename test/{TensorFlowNET.UnitTest => Tensorflow.UnitTest}/PythonTest.cs (99%)
 create mode 100644 test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj

diff --git a/TensorFlow.NET.sln b/TensorFlow.NET.sln
index 214b039d4..e0c273568 100644
--- a/TensorFlow.NET.sln
+++ b/TensorFlow.NET.sln
@@ -39,7 +39,9 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Benchmark", "too
 EndProject
 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Tensorflow.Console", "tools\TensorFlowNET.Console\Tensorflow.Console.csproj", "{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0}"
 EndProject
-Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "TensorFlow.Kernel.UnitTest", "test\TensorFlow.Kernel.UnitTest\TensorFlow.Kernel.UnitTest.csproj", "{654A027D-1364-4729-880B-144DFE1FF5BB}"
+Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TensorFlow.Kernel.UnitTest", "test\TensorFlow.Kernel.UnitTest\TensorFlow.Kernel.UnitTest.csproj", "{654A027D-1364-4729-880B-144DFE1FF5BB}"
+EndProject
+Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Tensorflow.UnitTest", "test\Tensorflow.UnitTest\Tensorflow.UnitTest.csproj", "{A73DF5A6-866E-4AED-9017-AA2EE86368C4}"
 EndProject
 Global
 	GlobalSection(SolutionConfigurationPlatforms) = preSolution
@@ -342,6 +344,24 @@ Global
 		{654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x64.Build.0 = Release|Any CPU
 		{654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x86.ActiveCfg = Release|Any CPU
 		{654A027D-1364-4729-880B-144DFE1FF5BB}.Release|x86.Build.0 = Release|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|Any CPU.Build.0 = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|x64.ActiveCfg = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|x64.Build.0 = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|x86.ActiveCfg = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Debug|x86.Build.0 = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|Any CPU.ActiveCfg = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|Any CPU.Build.0 = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|x64.ActiveCfg = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|x64.Build.0 = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|x86.ActiveCfg = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.GPU|x86.Build.0 = Debug|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|Any CPU.ActiveCfg = Release|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|Any CPU.Build.0 = Release|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|x64.ActiveCfg = Release|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|x64.Build.0 = Release|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|x86.ActiveCfg = Release|Any CPU
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4}.Release|x86.Build.0 = Release|Any CPU
 	EndGlobalSection
 	GlobalSection(SolutionProperties) = preSolution
 		HideSolutionNode = FALSE
@@ -363,6 +383,7 @@ Global
 		{C23563DB-FE21-48E7-A411-87A109E4A899} = {E1A5D2B7-10AF-4876-85C0-7714EF274214}
 		{1DC32255-BA1F-4D6D-A9C9-5BD5ED71CAA0} = {E1A5D2B7-10AF-4876-85C0-7714EF274214}
 		{654A027D-1364-4729-880B-144DFE1FF5BB} = {1B0918B9-65AD-4F34-A287-AF4597B27DBD}
+		{A73DF5A6-866E-4AED-9017-AA2EE86368C4} = {1B0918B9-65AD-4F34-A287-AF4597B27DBD}
 	EndGlobalSection
 	GlobalSection(ExtensibilityGlobals) = postSolution
 		SolutionGuid = {2DEAD3CC-486B-4918-A607-50B0DE7B114A}
diff --git a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs b/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
deleted file mode 100644
index ccf59f5ae..000000000
--- a/test/TensorFlowNET.Graph.UnitTest/PythonTest.cs
+++ /dev/null
@@ -1,448 +0,0 @@
-﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
-using Newtonsoft.Json.Linq;
-using Tensorflow.NumPy;
-using System;
-using System.Collections;
-using System.Linq;
-using Tensorflow;
-using static Tensorflow.Binding;
-using OneOf.Types;
-using System.Collections.Generic;
-
-namespace TensorFlowNET.UnitTest
-{
-    /// <summary>
-    /// Use as base class for test classes to get additional assertions
-    /// </summary>
-    public class PythonTest
-    {
-        #region python compatibility layer
-        protected PythonTest self { get => this; }
-        protected int None => -1;
-        #endregion
-
-        #region pytest assertions
-
-        public void assertItemsEqual(ICollection given, ICollection expected)
-        {
-            if (given is Hashtable && expected is Hashtable)
-            {
-                Assert.AreEqual(JObject.FromObject(expected).ToString(), JObject.FromObject(given).ToString());
-                return;
-            }
-            Assert.IsNotNull(expected);
-            Assert.IsNotNull(given);
-            var e = expected.OfType<object>().ToArray();
-            var g = given.OfType<object>().ToArray();
-            Assert.AreEqual(e.Length, g.Length, $"The collections differ in length expected {e.Length} but got {g.Length}");
-            for (int i = 0; i < e.Length; i++)
-            {
-                /*if (g[i] is NDArray && e[i] is NDArray)
-                    assertItemsEqual((g[i] as NDArray).GetData<object>(), (e[i] as NDArray).GetData<object>());
-                else*/
-                if (e[i] is ICollection && g[i] is ICollection)
-                    assertEqual(g[i], e[i]);
-                else
-                    Assert.AreEqual(e[i], g[i], $"Items differ at index {i}, expected {e[i]} but got {g[i]}");
-            }
-        }
-
-        public void assertAllEqual(ICollection given, ICollection expected)
-        {
-            assertItemsEqual(given, expected);
-        }
-
-        public void assertFloat32Equal(float expected, float actual, string msg)
-        {
-            float eps = 1e-6f;
-            Assert.IsTrue(Math.Abs(expected - actual) < eps * Math.Max(1.0f, Math.Abs(expected)), $"{msg}: expected {expected} vs actual {actual}");
-        }
-
-        public void assertFloat64Equal(double expected, double actual, string msg)
-        {
-            double eps = 1e-16f;
-            Assert.IsTrue(Math.Abs(expected - actual) < eps * Math.Max(1.0f, Math.Abs(expected)), $"{msg}: expected {expected} vs actual {actual}");
-        }
-
-        public void assertEqual(object given, object expected)
-        {
-            /*if (given is NDArray && expected is NDArray)
-            {
-                assertItemsEqual((given as NDArray).GetData<object>(), (expected as NDArray).GetData<object>());
-                return;
-            }*/
-            if (given is Hashtable && expected is Hashtable)
-            {
-                Assert.AreEqual(JObject.FromObject(expected).ToString(), JObject.FromObject(given).ToString());
-                return;
-            }
-            if (given is ICollection && expected is ICollection)
-            {
-                assertItemsEqual(given as ICollection, expected as ICollection);
-                return;
-            }
-            if (given is float && expected is float)
-            {
-                assertFloat32Equal((float)expected, (float)given, "");
-                return;
-            }
-            if (given is double && expected is double)
-            {
-                assertFloat64Equal((double)expected, (double)given, "");
-                return;
-            }
-            Assert.AreEqual(expected, given);
-        }
-
-        public void assertEquals(object given, object expected)
-        {
-            assertEqual(given, expected);
-        }
-
-        public void assert(object given)
-        {
-            if (given is bool)
-                Assert.IsTrue((bool)given);
-            Assert.IsNotNull(given);
-        }
-
-        public void assertIsNotNone(object given)
-        {
-            Assert.IsNotNull(given);
-        }
-
-        public void assertFalse(bool cond)
-        {
-            Assert.IsFalse(cond);
-        }
-
-        public void assertTrue(bool cond)
-        {
-            Assert.IsTrue(cond);
-        }
-
-        public void assertAllClose(NDArray array1, NDArray array2, double eps = 1e-5)
-        {
-            Assert.IsTrue(np.allclose(array1, array2, rtol: eps));
-        }
-
-        public void assertAllClose(double value, NDArray array2, double eps = 1e-5)
-        {
-            var array1 = np.ones_like(array2) * value;
-            // Assert.IsTrue(np.allclose(array1, array2, rtol: eps));
-        }
-
-        public void assertProtoEquals(object toProto, object o)
-        {
-            throw new NotImplementedException();
-        }
-
-        #endregion
-
-        #region tensor evaluation and test session
-
-        private Session _cached_session = null;
-        private Graph _cached_graph = null;
-        private object _cached_config = null;
-        private bool _cached_force_gpu = false;
-
-        private void _ClearCachedSession()
-        {
-            if (self._cached_session != null)
-            {
-                self._cached_session.Dispose();
-                self._cached_session = null;
-            }
-        }
-
-
-        //protected object _eval_helper(Tensor[] tensors)
-        //{
-        //    if (tensors == null)
-        //        return null;
-        //    return nest.map_structure(self._eval_tensor, tensors);
-        //}
-
-        protected object _eval_tensor(object tensor)
-        {
-            if (tensor == null)
-                return None;
-            //else if (callable(tensor))
-            //     return self._eval_helper(tensor())
-            else
-            {
-                try
-                {
-                    //TODO:
-                    //       if sparse_tensor.is_sparse(tensor):
-                    //         return sparse_tensor.SparseTensorValue(tensor.indices, tensor.values,
-                    //                                                tensor.dense_shape)
-                    //return (tensor as Tensor).numpy();
-                }
-                catch (Exception)
-                {
-                    throw new ValueError("Unsupported type: " + tensor.GetType());
-                }
-                return null;
-            }
-        }
-
-        /// <summary>
-        /// This function is used in many original tensorflow unit tests to evaluate tensors 
-        /// in a test session with special settings (for instance constant folding off)
-        /// 
-        /// </summary>
-        public T evaluate<T>(Tensor tensor)
-        {
-            object result = null;
-            //  if context.executing_eagerly():
-            //    return self._eval_helper(tensors)
-            //  else:
-            {
-                var sess = tf.Session();
-                var ndarray = tensor.eval(sess);
-                if (typeof(T) == typeof(double))
-                {
-                    double x = ndarray;
-                    result = x;
-                }
-                else if (typeof(T) == typeof(int))
-                {
-                    int x = ndarray;
-                    result = x;
-                }
-                else
-                {
-                    result = ndarray;
-                }
-
-                return (T)result;
-            }
-        }
-
-        ///Returns a TensorFlow Session for use in executing tests.
-        public Session cached_session(
-            Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false)
-        {
-            // This method behaves differently than self.session(): for performance reasons
-            // `cached_session` will by default reuse the same session within the same
-            // test.The session returned by this function will only be closed at the end
-            // of the test(in the TearDown function).
-
-            // Use the `use_gpu` and `force_gpu` options to control where ops are run.If
-            // `force_gpu` is True, all ops are pinned to `/ device:GPU:0`. Otherwise, if
-            // `use_gpu` is True, TensorFlow tries to run as many ops on the GPU as
-            // possible.If both `force_gpu and `use_gpu` are False, all ops are pinned to
-            // the CPU.
-
-            // Example:
-            // python
-            // class MyOperatorTest(test_util.TensorFlowTestCase) :
-            //   def testMyOperator(self):
-            //     with self.cached_session() as sess:
-            //       valid_input = [1.0, 2.0, 3.0, 4.0, 5.0]
-            //     result = MyOperator(valid_input).eval()
-            //       self.assertEqual(result, [1.0, 2.0, 3.0, 5.0, 8.0]
-            //       invalid_input = [-1.0, 2.0, 7.0]
-            //     with self.assertRaisesOpError("negative input not supported"):
-            //         MyOperator(invalid_input).eval()
-
-
-            // Args:
-            //   graph: Optional graph to use during the returned session.
-            //   config: An optional config_pb2.ConfigProto to use to configure the
-            //     session.
-            //   use_gpu: If True, attempt to run as many ops as possible on GPU.
-            //   force_gpu: If True, pin all ops to `/device:GPU:0`.
-
-            // Yields:
-            //   A Session object that should be used as a context manager to surround
-            //   the graph building and execution code in a test case.
-
-
-            // TODO:
-            //  if context.executing_eagerly():
-            //    return self._eval_helper(tensors)
-            //  else:
-            {
-                var sess = self._get_cached_session(
-                    graph, config, force_gpu, crash_if_inconsistent_args: true);
-                using var cached = self._constrain_devices_and_set_default(sess, use_gpu, force_gpu);
-                return cached; 
-            }
-        }
-
-        //Returns a TensorFlow Session for use in executing tests.
-        public Session session(Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false)
-        {
-            //Note that this will set this session and the graph as global defaults.
-
-            //Use the `use_gpu` and `force_gpu` options to control where ops are run.If
-            //`force_gpu` is True, all ops are pinned to `/device:GPU:0`. Otherwise, if
-            //`use_gpu` is True, TensorFlow tries to run as many ops on the GPU as
-            //possible.If both `force_gpu and `use_gpu` are False, all ops are pinned to
-            //the CPU.
-
-            //Example:
-            //```python
-            //class MyOperatorTest(test_util.TensorFlowTestCase):
-            //  def testMyOperator(self):
-            //    with self.session(use_gpu= True):
-            //      valid_input = [1.0, 2.0, 3.0, 4.0, 5.0]
-            //    result = MyOperator(valid_input).eval()
-            //      self.assertEqual(result, [1.0, 2.0, 3.0, 5.0, 8.0]
-            //      invalid_input = [-1.0, 2.0, 7.0]
-            //    with self.assertRaisesOpError("negative input not supported"):
-            //        MyOperator(invalid_input).eval()
-            //```
-
-            //Args:
-            //  graph: Optional graph to use during the returned session.
-            //  config: An optional config_pb2.ConfigProto to use to configure the
-            //    session.
-            //  use_gpu: If True, attempt to run as many ops as possible on GPU.
-            //  force_gpu: If True, pin all ops to `/device:GPU:0`.
-
-            //Yields:
-            //  A Session object that should be used as a context manager to surround
-            //  the graph building and execution code in a test case.
-
-            Session s = null;
-            //if (context.executing_eagerly())
-            //  yield None
-            //else 
-            //{
-            s = self._create_session(graph, config, force_gpu);
-            //}
-            return s.as_default();
-        }
-
-        private Session _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu)
-        {
-            // Set the session and its graph to global default and constrain devices."""
-            if (tf.executing_eagerly())
-                return null;
-            else {
-                sess.graph.as_default();
-                sess.as_default();
-                {
-                    if (force_gpu)
-                    {
-                        // TODO:
-
-                        // Use the name of an actual device if one is detected, or
-                        // '/device:GPU:0' otherwise
-                        /* var gpu_name = gpu_device_name();
-                        if (!gpu_name)
-                            gpu_name = "/device:GPU:0"
-                        using (sess.graph.device(gpu_name)) {
-                            yield return sess;
-                        }*/
-                        return sess;
-                    }
-                    else if (use_gpu)
-                        return sess;
-                    else 
-                        using (sess.graph.device("/device:CPU:0"))
-                            return sess;
-                }
-                
-            }
-        }
-
-        // See session() for details.
-        private Session _create_session(Graph graph, object cfg, bool forceGpu)
-        {
-            var prepare_config = new Func<object, object>((config) =>
-            {
-                //  """Returns a config for sessions.
-                //  Args:
-                //        config: An optional config_pb2.ConfigProto to use to configure the
-                //      session.
-                //  Returns:
-                //    A config_pb2.ConfigProto object.
-
-                //TODO: config
-
-                //  # use_gpu=False. Currently many tests rely on the fact that any device
-                //  # will be used even when a specific device is supposed to be used.
-                //  allow_soft_placement = not force_gpu
-                //  if config is None:
-                //    config = config_pb2.ConfigProto()
-                //    config.allow_soft_placement = allow_soft_placement
-                //    config.gpu_options.per_process_gpu_memory_fraction = 0.3
-                //  elif not allow_soft_placement and config.allow_soft_placement:
-                //    config_copy = config_pb2.ConfigProto()
-                //    config_copy.CopyFrom(config)
-                //    config = config_copy
-                //    config.allow_soft_placement = False
-                //  # Don't perform optimizations for tests so we don't inadvertently run
-                //  # gpu ops on cpu
-                //  config.graph_options.optimizer_options.opt_level = -1
-                //  # Disable Grappler constant folding since some tests & benchmarks
-                //  # use constant input and become meaningless after constant folding.
-                //  # DO NOT DISABLE GRAPPLER OPTIMIZERS WITHOUT CONSULTING WITH THE
-                //  # GRAPPLER TEAM.
-                //  config.graph_options.rewrite_options.constant_folding = (
-                //      rewriter_config_pb2.RewriterConfig.OFF)
-                //  config.graph_options.rewrite_options.pin_to_host_optimization = (
-                //      rewriter_config_pb2.RewriterConfig.OFF)
-                return config;
-            });
-            //TODO: use this instead of normal session
-            //return new ErrorLoggingSession(graph = graph, config = prepare_config(config))
-            return new Session(graph);//, config = prepare_config(config))
-        }
-
-        private Session _get_cached_session(
-                          Graph graph = null,
-                          object config = null,
-                          bool force_gpu = false,
-                          bool crash_if_inconsistent_args = true)
-        {
-            // See cached_session() for documentation.
-            if (self._cached_session == null)
-            {
-                var sess = self._create_session(graph, config, force_gpu);
-                self._cached_session = sess;
-                self._cached_graph = graph;
-                self._cached_config = config;
-                self._cached_force_gpu = force_gpu;
-                return sess;
-            } else {
-
-                if (crash_if_inconsistent_args && !self._cached_graph.Equals(graph))
-                    throw new ValueError(@"The graph used to get the cached session is 
-                                           different than the one that was used to create the
-                                           session. Maybe create a new session with 
-                                           self.session()");
-                if (crash_if_inconsistent_args && !self._cached_config.Equals(config)) {
-                    throw new ValueError(@"The config used to get the cached session is 
-                                           different than the one that was used to create the 
-                                           session. Maybe create a new session with 
-                                           self.session()");
-                }
-                if (crash_if_inconsistent_args && !self._cached_force_gpu.Equals(force_gpu)) {
-                    throw new ValueError(@"The force_gpu value used to get the cached session is 
-                                           different than the one that was used to create the 
-                                           session. Maybe create a new session with 
-                                           self.session()");
-                }
-                return _cached_session;
-            }
-        }
-
-        [TestCleanup]
-        public void Cleanup()
-        {
-            _ClearCachedSession();
-        }
-
-        #endregion
-
-        public void AssetSequenceEqual<T>(T[] a, T[] b)
-        {
-            Assert.IsTrue(Enumerable.SequenceEqual(a, b));
-        }
-    }
-}
diff --git a/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj b/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj
index 78a0938c5..74663c1cb 100644
--- a/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj
+++ b/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj
@@ -36,6 +36,7 @@
   <ItemGroup>
     <ProjectReference Include="..\..\src\TensorFlowNET.Core\Tensorflow.Binding.csproj" />
     <ProjectReference Include="..\..\tools\Tensorflow.UnitTest.RedistHolder\Tensorflow.UnitTest.RedistHolder.csproj" />
+    <ProjectReference Include="..\Tensorflow.UnitTest\Tensorflow.UnitTest.csproj" />
     <ProjectReference Include="..\TensorFlowNET.Keras.UnitTest\Tensorflow.Keras.UnitTest.csproj" />
   </ItemGroup>
 
diff --git a/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj b/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj
index 7a6a7f92c..5264cb104 100644
--- a/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj
+++ b/test/TensorFlowNET.UnitTest/Tensorflow.Binding.UnitTest.csproj
@@ -51,6 +51,7 @@
     <ProjectReference Include="..\..\src\TensorFlowNET.Core\Tensorflow.Binding.csproj" />
     <ProjectReference Include="..\..\src\TensorFlowNET.Text\Tensorflow.Text.csproj" />
     <ProjectReference Include="..\..\tools\Tensorflow.UnitTest.RedistHolder\Tensorflow.UnitTest.RedistHolder.csproj" />
+    <ProjectReference Include="..\Tensorflow.UnitTest\Tensorflow.UnitTest.csproj" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/test/TensorFlowNET.UnitTest/PythonTest.cs b/test/Tensorflow.UnitTest/PythonTest.cs
similarity index 99%
rename from test/TensorFlowNET.UnitTest/PythonTest.cs
rename to test/Tensorflow.UnitTest/PythonTest.cs
index 090ef097c..b2412ea9f 100644
--- a/test/TensorFlowNET.UnitTest/PythonTest.cs
+++ b/test/Tensorflow.UnitTest/PythonTest.cs
@@ -1,12 +1,9 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using Newtonsoft.Json.Linq;
 using Tensorflow.NumPy;
-using System;
 using System.Collections;
-using System.Linq;
 using Tensorflow;
 using static Tensorflow.Binding;
-using System.Collections.Generic;
 
 namespace TensorFlowNET.UnitTest
 {
diff --git a/test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj b/test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj
new file mode 100644
index 000000000..66a7d63bd
--- /dev/null
+++ b/test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+
+  <PropertyGroup>
+    <TargetFramework>net6.0</TargetFramework>
+    <ImplicitUsings>enable</ImplicitUsings>
+    <Nullable>enable</Nullable>
+
+    <IsPackable>false</IsPackable>
+    <IsTestProject>true</IsTestProject>
+  </PropertyGroup>
+
+  <ItemGroup>
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.5.0" />
+    <PackageReference Include="MSTest.TestAdapter" Version="2.2.10" />
+    <PackageReference Include="MSTest.TestFramework" Version="2.2.10" />
+    <PackageReference Include="coverlet.collector" Version="3.2.0" />
+  </ItemGroup>
+
+  <ItemGroup>
+    <ProjectReference Include="..\..\src\TensorFlowNET.Core\Tensorflow.Binding.csproj" />
+    <ProjectReference Include="..\..\tools\Tensorflow.UnitTest.RedistHolder\Tensorflow.UnitTest.RedistHolder.csproj" />
+  </ItemGroup>
+
+</Project>

From 165e9169e49841bb2d326ff903949244565a1a00 Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Fri, 10 Nov 2023 21:01:12 +0000
Subject: [PATCH 58/77] assert all close

---
 .../GradientTest/GradientTest.cs              | 22 +------------------
 test/Tensorflow.UnitTest/PythonTest.cs        | 18 +++++++--------
 2 files changed, 10 insertions(+), 30 deletions(-)

diff --git a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
index e2d6db912..cea6de172 100644
--- a/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
+++ b/test/TensorFlowNET.Graph.UnitTest/GradientTest/GradientTest.cs
@@ -625,25 +625,6 @@ public void testPartialDerivatives()
             }
         }
 
-        // TODO: remove when np.testing.assert_allclose(a, b) is implemented
-        private class CollectionComparer : System.Collections.IComparer
-        {
-            private readonly double _epsilon = 1e-07;
-
-            public int Compare(object x, object y)
-            {
-                var a = (double)x;
-                var b = (double)y;
-
-                double delta = Math.Abs(a - b);
-                if (delta < _epsilon)
-                {
-                    return 0;
-                }
-                return a.CompareTo(b);
-            }
-        }
-
         private struct Case
         {
             public Tensor[] grad1;
@@ -748,8 +729,7 @@ Tensor[] gradients(Tensor[] ys, Tensor[] xs, Tensor[] stop_gradients = null)
                     var npgrad2 = result[1];
                     foreach (var (a, b) in npgrad1.Zip(npgrad2))
                     {
-                        // TODO: np.testing.assert_allclose(a, b);
-                        CollectionAssert.AreEqual(a.ToArray(), b.ToArray(), new CollectionComparer());
+                        self.assertAllClose(a, b);
                     }
                 }
             }
diff --git a/test/Tensorflow.UnitTest/PythonTest.cs b/test/Tensorflow.UnitTest/PythonTest.cs
index b2412ea9f..650f70f2c 100644
--- a/test/Tensorflow.UnitTest/PythonTest.cs
+++ b/test/Tensorflow.UnitTest/PythonTest.cs
@@ -185,9 +185,9 @@ public void assertProtoEquals(object toProto, object o)
 
         #region tensor evaluation and test session
 
-        private Session _cached_session = null;
-        private Graph _cached_graph = null;
-        private object _cached_config = null;
+        private Session? _cached_session = null;
+        private Graph? _cached_graph = null;
+        private object? _cached_config = null;
         private bool _cached_force_gpu = false;
 
         private void _ClearCachedSession()
@@ -237,7 +237,7 @@ protected object _eval_tensor(object tensor)
         /// </summary>
         public T evaluate<T>(Tensor tensor)
         {
-            object result = null;
+            object? result = null;
             //  if context.executing_eagerly():
             //    return self._eval_helper(tensors)
             //  else:
@@ -274,7 +274,7 @@ public T evaluate<T>(Tensor tensor)
 
         ///Returns a TensorFlow Session for use in executing tests.
         public Session cached_session(
-            Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false)
+            Graph? graph = null, object? config = null, bool use_gpu = false, bool force_gpu = false)
         {
             // This method behaves differently than self.session(): for performance reasons
             // `cached_session` will by default reuse the same session within the same
@@ -325,7 +325,7 @@ public Session cached_session(
         }
 
         //Returns a TensorFlow Session for use in executing tests.
-        public Session session(Graph graph = null, object config = null, bool use_gpu = false, bool force_gpu = false)
+        public Session session(Graph? graph = null, object? config = null, bool use_gpu = false, bool force_gpu = false)
         {
             //Note that this will set this session and the graph as global defaults.
 
@@ -359,7 +359,7 @@ public Session session(Graph graph = null, object config = null, bool use_gpu =
             //  A Session object that should be used as a context manager to surround
             //  the graph building and execution code in a test case.
 
-            Session s = null;
+            Session? s = null;
             //if (context.executing_eagerly())
             //  yield None
             //else 
@@ -448,8 +448,8 @@ private Session _create_session(Graph graph, object cfg, bool forceGpu)
         }
 
         private Session _get_cached_session(
-                          Graph graph = null,
-                          object config = null,
+                          Graph? graph = null,
+                          object? config = null,
                           bool force_gpu = false,
                           bool crash_if_inconsistent_args = true)
         {

From b906c9a69a15ad413f519db741335bdb1aedf07a Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Fri, 10 Nov 2023 21:16:42 +0000
Subject: [PATCH 59/77] fix nullability

---
 .../Tensorflow.Keras.UnitTest.csproj          |  1 +
 test/Tensorflow.UnitTest/PythonTest.cs        | 29 ++++++++++++++-----
 2 files changed, 22 insertions(+), 8 deletions(-)

diff --git a/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj b/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj
index 3910eba1c..e8b8d42b3 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj
+++ b/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj
@@ -25,6 +25,7 @@
   <ItemGroup>
     <ProjectReference Include="..\..\src\TensorFlowNET.Keras\Tensorflow.Keras.csproj" />
     <ProjectReference Include="..\..\tools\Tensorflow.UnitTest.RedistHolder\Tensorflow.UnitTest.RedistHolder.csproj" />
+    <ProjectReference Include="..\TensorFlow.Kernel.UnitTest\TensorFlow.Kernel.UnitTest.csproj" />
   </ItemGroup>
 
   <ItemGroup>
diff --git a/test/Tensorflow.UnitTest/PythonTest.cs b/test/Tensorflow.UnitTest/PythonTest.cs
index 650f70f2c..5d1b1e0e1 100644
--- a/test/Tensorflow.UnitTest/PythonTest.cs
+++ b/test/Tensorflow.UnitTest/PythonTest.cs
@@ -86,9 +86,9 @@ public void assertEqual(object given, object expected)
                 Assert.AreEqual(JObject.FromObject(expected).ToString(), JObject.FromObject(given).ToString());
                 return;
             }
-            if (given is ICollection && expected is ICollection)
+            if (given is ICollection collectionGiven && expected is ICollection collectionExpected)
             {
-                assertItemsEqual(given as ICollection, expected as ICollection);
+                assertItemsEqual(collectionGiven, collectionExpected);
                 return;
             }
             if (given is float && expected is float)
@@ -150,8 +150,21 @@ public CollectionComparer(double eps = 1e-06)
             {
                 _epsilon = eps;
             }
-            public int Compare(object x, object y)
+            public int Compare(object? x, object? y)
             {
+                if (x == null && y == null)
+                {
+                    return 0;
+                }
+                else if (x == null)
+                {
+                    return -1; 
+                }
+                else if (y == null)
+                {
+                    return 1;
+                }
+
                 var a = (double)x;
                 var b = (double)y;
 
@@ -206,7 +219,7 @@ private void _ClearCachedSession()
         //    return nest.map_structure(self._eval_tensor, tensors);
         //}
 
-        protected object _eval_tensor(object tensor)
+        protected object? _eval_tensor(object tensor)
         {
             if (tensor == null)
                 return None;
@@ -273,7 +286,7 @@ public T evaluate<T>(Tensor tensor)
 
 
         ///Returns a TensorFlow Session for use in executing tests.
-        public Session cached_session(
+        public Session? cached_session(
             Graph? graph = null, object? config = null, bool use_gpu = false, bool force_gpu = false)
         {
             // This method behaves differently than self.session(): for performance reasons
@@ -369,7 +382,7 @@ public Session session(Graph? graph = null, object? config = null, bool use_gpu
             return s.as_default();
         }
 
-        private Session _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu)
+        private Session? _constrain_devices_and_set_default(Session sess, bool use_gpu, bool force_gpu)
         {
             // Set the session and its graph to global default and constrain devices."""
             if (tf.executing_eagerly())
@@ -404,7 +417,7 @@ private Session _constrain_devices_and_set_default(Session sess, bool use_gpu, b
         }
 
         // See session() for details.
-        private Session _create_session(Graph graph, object cfg, bool forceGpu)
+        private Session _create_session(Graph? graph, object? cfg, bool forceGpu)
         {
             var prepare_config = new Func<object, object>((config) =>
             {
@@ -485,7 +498,7 @@ different than the one that was used to create the
                                            session. Maybe create a new session with 
                                            self.session()");
                 }
-                return _cached_session;
+                return self._cached_session;
             }
         }
 

From b6db9410b3c66ad30ac900330708060231e39809 Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Fri, 10 Nov 2023 21:20:13 +0000
Subject: [PATCH 60/77] update packages

---
 .../TensorFlow.Kernel.UnitTest.csproj                         | 2 +-
 .../TensorFlowNET.Graph.UnitTest.csproj                       | 2 +-
 .../Tensorflow.Keras.UnitTest.csproj                          | 2 +-
 .../Tensorflow.Native.UnitTest.csproj                         | 2 +-
 test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj           | 4 ++--
 .../TensorflowNET.Hub.Unittest/Tensorflow.Hub.Unittest.csproj | 2 +-
 6 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj b/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
index 21b2731b7..461993408 100644
--- a/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
+++ b/test/TensorFlow.Kernel.UnitTest/TensorFlow.Kernel.UnitTest.csproj
@@ -10,7 +10,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.5.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.3" />
     <PackageReference Include="MSTest.TestAdapter" Version="2.2.10" />
     <PackageReference Include="MSTest.TestFramework" Version="2.2.10" />
     <PackageReference Include="coverlet.collector" Version="3.2.0" />
diff --git a/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj b/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj
index 74663c1cb..40dd53f74 100644
--- a/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj
+++ b/test/TensorFlowNET.Graph.UnitTest/TensorFlowNET.Graph.UnitTest.csproj
@@ -24,7 +24,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.3" />
     <PackageReference Include="MSTest.TestAdapter" Version="2.2.10" />
     <PackageReference Include="MSTest.TestFramework" Version="2.2.10" />
     <PackageReference Include="coverlet.collector" Version="3.2.0">
diff --git a/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj b/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj
index e8b8d42b3..edac1c2ff 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj
+++ b/test/TensorFlowNET.Keras.UnitTest/Tensorflow.Keras.UnitTest.csproj
@@ -13,7 +13,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.3" />
     <PackageReference Include="MSTest.TestAdapter" Version="2.2.10" />
     <PackageReference Include="MSTest.TestFramework" Version="2.2.10" />
     <PackageReference Include="coverlet.collector" Version="3.2.0">
diff --git a/test/TensorFlowNET.Native.UnitTest/Tensorflow.Native.UnitTest.csproj b/test/TensorFlowNET.Native.UnitTest/Tensorflow.Native.UnitTest.csproj
index a4f1ec567..c054a8707 100644
--- a/test/TensorFlowNET.Native.UnitTest/Tensorflow.Native.UnitTest.csproj
+++ b/test/TensorFlowNET.Native.UnitTest/Tensorflow.Native.UnitTest.csproj
@@ -44,7 +44,7 @@
 
   <ItemGroup>
     <PackageReference Include="FluentAssertions" Version="5.10.3" />
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.3" />
     <PackageReference Include="MSTest.TestAdapter" Version="2.2.10" />
     <PackageReference Include="MSTest.TestFramework" Version="2.2.10" />
     <PackageReference Include="coverlet.collector" Version="3.2.0">
diff --git a/test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj b/test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj
index 66a7d63bd..9ad6bc7a5 100644
--- a/test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj
+++ b/test/Tensorflow.UnitTest/Tensorflow.UnitTest.csproj
@@ -1,4 +1,4 @@
-<Project Sdk="Microsoft.NET.Sdk">
+﻿<Project Sdk="Microsoft.NET.Sdk">
 
   <PropertyGroup>
     <TargetFramework>net6.0</TargetFramework>
@@ -10,7 +10,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.5.0" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.3" />
     <PackageReference Include="MSTest.TestAdapter" Version="2.2.10" />
     <PackageReference Include="MSTest.TestFramework" Version="2.2.10" />
     <PackageReference Include="coverlet.collector" Version="3.2.0" />
diff --git a/test/TensorflowNET.Hub.Unittest/Tensorflow.Hub.Unittest.csproj b/test/TensorflowNET.Hub.Unittest/Tensorflow.Hub.Unittest.csproj
index 4c3918e4a..c93b89256 100644
--- a/test/TensorflowNET.Hub.Unittest/Tensorflow.Hub.Unittest.csproj
+++ b/test/TensorflowNET.Hub.Unittest/Tensorflow.Hub.Unittest.csproj
@@ -9,7 +9,7 @@
   </PropertyGroup>
 
   <ItemGroup>
-    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.3.2" />
+    <PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.6.3" />
     <PackageReference Include="MSTest.TestAdapter" Version="2.2.10" />
     <PackageReference Include="MSTest.TestFramework" Version="2.2.10" />
     <PackageReference Include="coverlet.collector" Version="3.1.2" />

From 7968dc360fbcbb57265e8a49192c8b028e9d0196 Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Sat, 11 Nov 2023 05:54:38 +0000
Subject: [PATCH 61/77] fix test

---
 test/Tensorflow.UnitTest/PythonTest.cs | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/test/Tensorflow.UnitTest/PythonTest.cs b/test/Tensorflow.UnitTest/PythonTest.cs
index 5d1b1e0e1..dff652933 100644
--- a/test/Tensorflow.UnitTest/PythonTest.cs
+++ b/test/Tensorflow.UnitTest/PythonTest.cs
@@ -133,13 +133,23 @@ public void assertTrue(bool cond)
 
         public void assertAllClose(NDArray array1, NDArray array2, double eps = 1e-5)
         {
-            Assert.IsTrue(np.allclose(array1, array2, rtol: eps));
+            CollectionAssert.AreEqual(array1.ToArray(), array2.ToArray(), new CollectionComparer(eps));
+
+            //TODO: Assert.IsTrue(np.allclose(array1, array2, rtol: eps));
         }
 
         public void assertAllClose(double value, NDArray array2, double eps = 1e-5)
         {
+            if (array2.shape.IsScalar)
+            {
+                double value2 = array2;
+                Assert.AreEqual(value, value2, eps);
+                return;
+            }
             var array1 = np.ones_like(array2) * value;
-            Assert.IsTrue(np.allclose(array1, array2, rtol: eps));
+            CollectionAssert.AreEqual(array1.ToArray(), array2.ToArray(), new CollectionComparer(eps));
+
+            //TODO: Assert.IsTrue(np.allclose(array1, array2, rtol: eps));
         }
 
         private class CollectionComparer : IComparer
@@ -158,7 +168,7 @@ public int Compare(object? x, object? y)
                 }
                 else if (x == null)
                 {
-                    return -1; 
+                    return -1;
                 }
                 else if (y == null)
                 {

From d54f7a62e0e66dee73eff78ce5c93acb195ce813 Mon Sep 17 00:00:00 2001
From: Alexander <novikov_alexander@live.ru>
Date: Mon, 13 Nov 2023 10:33:14 +0000
Subject: [PATCH 62/77] test: more gradients tests

---
 .../Training/GradientDescentOptimizerTests.cs | 113 ++++++++++++++++++
 test/Tensorflow.UnitTest/PythonTest.cs        |  45 +++++--
 2 files changed, 149 insertions(+), 9 deletions(-)

diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
index d766890b2..f7062f00d 100644
--- a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
+++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
@@ -1,5 +1,6 @@
 ﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
+using System.Linq;
 using Tensorflow;
 using Tensorflow.NumPy;
 using static Tensorflow.Binding;
@@ -67,6 +68,51 @@ public void TestBasic()
             TestBasic<double>();
         }
 
+        private void TestMinimizeResourceVariable<T>() where T : struct
+        {
+            var dtype = GetTypeForNumericType<T>();
+
+            // train.GradientDescentOptimizer is V1 only API.
+            tf.Graph().as_default();
+            using (var sess = self.cached_session())
+            {
+                var var0 = tf.Variable(new[,] { { 1.0f, 2.0f } }, dtype: dtype);
+                var var1 = tf.Variable(new[] { 3.0 }, dtype: dtype);
+                var x = tf.constant(new[,] { { 4.0f }, { 5.0f } }, dtype: dtype);
+
+                var pred = math_ops.matmul(var0, x) + var1;
+                var loss = pred * pred;
+                var sgd_op = tf.train.GradientDescentOptimizer(3.0f).minimize(loss);
+
+                var global_variables = tf.global_variables_initializer();
+                sess.run(global_variables);
+
+                sess.run(new[] { var0, var1 });
+                // Fetch params to validate initial values
+                self.assertAllCloseAccordingToType<T>(new[,] { { 1.0, 2.0 } }, self.evaluate<T[,]>(var0));
+                self.assertAllCloseAccordingToType(new[] { 3.0 }, self.evaluate<T[]>(var1));
+                // Run 1 step of sgd
+                sgd_op.run();
+                // Validate updated params
+                var np_pred = 1.0 * 4.0 + 2.0 * 5.0 + 3.0;
+                var np_grad = 2 * np_pred;
+                self.assertAllCloseAccordingToType(
+                    new[,] { { 1.0 - np_grad * 4.0, 2.0 - np_grad * 5.0 } },
+                    self.evaluate<T[,]>(var0));
+                self.assertAllCloseAccordingToType(
+                    new[] { 3.0 - np_grad },
+                    self.evaluate<T[]>(var1));
+            }
+        }
+
+        [TestMethod]
+        public void TestMinimizeResourceVariable()
+        {
+            //TODO: add np.half
+            TestMinimizeResourceVariable<float>();
+            TestMinimizeResourceVariable<double>();
+        }
+
         private void TestTensorLearningRate<T>() where T : struct
         {
             var dtype = GetTypeForNumericType<T>();
@@ -115,5 +161,72 @@ public void TestTensorLearningRate()
             TestTensorLearningRate<float>();
             TestTensorLearningRate<double>();
         }
+
+        public void TestGradWrtRef<T>() where T : struct
+        {
+            var dtype = GetTypeForNumericType<T>();
+
+            var graph = tf.Graph().as_default();
+            using (var sess = self.cached_session())
+            {
+                var opt = tf.train.GradientDescentOptimizer(3.0f);
+                var values = new[] { 1.0, 3.0 };
+                var vars_ = values.Select(
+                        v => tf.Variable(new[] { v }, dtype: dtype) as IVariableV1
+                    ).ToList();
+                var grads_and_vars = opt.compute_gradients(tf.add(vars_[0], vars_[1]), vars_);
+                sess.run(tf.global_variables_initializer());
+                foreach (var (grad, _) in grads_and_vars)
+                    self.assertAllCloseAccordingToType(new[] { 1.0 }, self.evaluate<T[]>(grad));
+
+            }
+        }
+
+        [TestMethod]
+        public void TestGradWrtRef()
+        {
+            TestGradWrtRef<float>();
+            TestGradWrtRef<double>();
+        }
+
+        public void TestWithGlobalStep<T>() where T : struct
+        {
+            var dtype = GetTypeForNumericType<T>();
+
+            tf.Graph().as_default();
+            using (var sess = self.cached_session())
+            {
+                var global_step = tf.Variable(0, trainable: false);
+                var var0 = tf.Variable(new[] { 1.0, 2.0 }, dtype: dtype);
+                var var1 = tf.Variable(new[] { 3.0, 4.0 }, dtype: dtype);
+                var grads0 = tf.constant(new[] { 0.1, 0.1 }, dtype: dtype);
+                var grads1 = tf.constant(new[] { 0.01, 0.01 }, dtype: dtype);
+                var grads_and_vars = new[] {
+                    Tuple.Create(grads0, var0 as IVariableV1),
+                    Tuple.Create(grads1, var1 as IVariableV1)
+                };
+                var sgd_op = tf.train.GradientDescentOptimizer(3.0f)
+                    .apply_gradients(grads_and_vars, global_step: global_step);
+
+                sess.run(tf.global_variables_initializer());
+                // Fetch params to validate initial values
+                self.assertAllCloseAccordingToType(new[] { 1.0, 2.0 }, self.evaluate<T[]>(var0));
+                self.assertAllCloseAccordingToType(new[] { 3.0, 4.0 }, self.evaluate<T[]>(var1));
+                // Run 1 step of sgd
+                sgd_op.run();
+                // Validate updated params and global_step
+                self.assertAllCloseAccordingToType(new[] { 1.0 - 3.0 * 0.1, 2.0 - 3.0 * 0.1 }, self.evaluate<T[]>(var0));
+                self.assertAllCloseAccordingToType(new[] { 3.0 - 3.0 * 0.01, 4.0 - 3.0 * 0.01 }, self.evaluate<T[]>(var1));
+                Assert.AreEqual(1, self.evaluate<int>(global_step));
+            }
+
+        }
+
+        [TestMethod]
+        public void TestWithGlobalStep()
+        {
+            TestWithGlobalStep<float>();
+            TestWithGlobalStep<double>();
+        }
     }
 }
diff --git a/test/Tensorflow.UnitTest/PythonTest.cs b/test/Tensorflow.UnitTest/PythonTest.cs
index dff652933..1ccd39f02 100644
--- a/test/Tensorflow.UnitTest/PythonTest.cs
+++ b/test/Tensorflow.UnitTest/PythonTest.cs
@@ -175,8 +175,8 @@ public int Compare(object? x, object? y)
                     return 1;
                 }
 
-                var a = (double)x;
-                var b = (double)y;
+                var a = Convert.ToDouble(x);
+                var b = Convert.ToDouble(y);
 
                 double delta = Math.Abs(a - b);
                 if (delta < _epsilon)
@@ -187,6 +187,19 @@ public int Compare(object? x, object? y)
             }
         }
 
+        public void assertAllCloseAccordingToType<T>(
+            double[,] expected,
+            T[,] given,
+            double eps = 1e-6,
+            float float_eps = 1e-6f)
+        {
+            Assert.AreEqual(expected.GetLength(0), given.GetLength(0));
+            Assert.AreEqual(expected.GetLength(1), given.GetLength(1));
+
+            var flattenGiven = given.Cast<T>().ToArray();
+            assertAllCloseAccordingToType(expected, flattenGiven, eps, float_eps);
+        }
+
         public void assertAllCloseAccordingToType<T>(
             ICollection expected,
             ICollection<T> given,
@@ -267,21 +280,35 @@ public T evaluate<T>(Tensor tensor)
             {
                 var sess = tf.get_default_session();
                 var ndarray = tensor.eval(sess);
-                if (typeof(T) == typeof(double)
-                    || typeof(T) == typeof(float)
-                    || typeof(T) == typeof(int))
+
+                if (typeof(T) == typeof(int))
+                {
+                    int i = ndarray;
+                    result = i;
+                }
+                else if (typeof(T) == typeof(float))
+                {
+                    float f = ndarray;
+                    result = f;
+                }
+                else if (typeof(T) == typeof(double))
                 {
-                    result = Convert.ChangeType(ndarray, typeof(T));
+                    double d = ndarray;
+                    result = d;
                 }
-                else if (typeof(T) == typeof(double[]))
+                else if (
+                    typeof(T) == typeof(double[])
+                    || typeof(T) == typeof(double[,]))
                 {
                     result = ndarray.ToMultiDimArray<double>();
                 }
-                else if (typeof(T) == typeof(float[]))
+                else if (typeof(T) == typeof(float[])
+                    || typeof(T) == typeof(float[,]))
                 {
                     result = ndarray.ToMultiDimArray<float>();
                 }
-                else if (typeof(T) == typeof(int[]))
+                else if (typeof(T) == typeof(int[]) 
+                    || typeof(T) == typeof(int[,]))
                 {
                     result = ndarray.ToMultiDimArray<int>();
                 }

From eb0f02577290d930930349870b161e85553e967a Mon Sep 17 00:00:00 2001
From: barfeous <nzksf2wa@duck.com>
Date: Mon, 12 Feb 2024 13:28:54 -0600
Subject: [PATCH 63/77] avoid modifying collection

---
 .../Training/Saving/SavedModel/AugmentedGraphView.cs          | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs b/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs
index a91933357..c6b26ff49 100644
--- a/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs
+++ b/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs
@@ -88,7 +88,7 @@ private ConcreteFunction maybe_uncache_variable_captures(ConcreteFunction concre
 
     public override (IList<Trackable>, IDictionary<Trackable, IEnumerable<TrackableReference>>) breadth_first_traversal()
     {
-        Trackable get_merged_trackable(Trackable x)
+        void merged_trackable(Trackable x)
         {
             // TODO: complete it with new definitions `Asset` and `TrackableConstant`.
             return x;
@@ -100,7 +100,7 @@ Trackable get_merged_trackable(Trackable x)
             // skip the deletion of cache (maybe do it later).
             foreach(var pair in _children_cache[obj])
             {
-                _children_cache[obj][pair.Key] = get_merged_trackable(pair.Value);
+                merged_trackable(pair.Value);
             }
         }
 

From 3448b6434680270026a0f938e913ff1f08f1df9b Mon Sep 17 00:00:00 2001
From: barfeous <nzksf2wa@duck.com>
Date: Wed, 14 Feb 2024 20:25:15 -0600
Subject: [PATCH 64/77] Remove parameter return from newly void local method

---
 .../Training/Saving/SavedModel/AugmentedGraphView.cs            | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs b/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs
index c6b26ff49..3b4bbdc63 100644
--- a/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs
+++ b/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs
@@ -91,8 +91,8 @@ public override (IList<Trackable>, IDictionary<Trackable, IEnumerable<TrackableR
         void merged_trackable(Trackable x)
         {
             // TODO: complete it with new definitions `Asset` and `TrackableConstant`.
-            return x;
         }
+
         var trackable_objects = base.breadth_first_traversal();
 
         foreach(var obj in _children_cache.Keys)

From 27a9e912f0aea535cd3124aac8c54269bfdb05c7 Mon Sep 17 00:00:00 2001
From: Rinne <AsakusaRinne@gmail.com>
Date: Mon, 11 Mar 2024 03:05:42 +0800
Subject: [PATCH 65/77] docs: update README.md

---
 README.md | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/README.md b/README.md
index 0198c873c..75cad0aa7 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,14 @@
 
 English | [中文](docs/README-CN.md)
 
+> [!IMPORTANT]
+> We're happy that our work on tensorflow.net has attracted many users. However, at this time, none of the main maintainers of this repo is available for new features and bug fix. We won't refuse PRs and will help to review them.
+> 
+> If you would like to be a contributor or maintainer of tensorflow.net, we'd like to help you to start up.
+>
+> We feel sorry for that and we'll resume the maintaining for this project once one of us has bandwidth for it.
+>   
+
 *master branch and v0.100.x is corresponding to tensorflow v2.10, v0.6x branch is from tensorflow v2.6, v0.15-tensorflow1.15 is from tensorflow1.15. Please add `https://www.myget.org/F/scisharp/api/v3/index.json` to nuget source to use nightly release.*
 
 

From 4a31621a5632c7d6b2ebca1d36561458b91367c5 Mon Sep 17 00:00:00 2001
From: barfeous <nzksf2wa@duck.com>
Date: Sun, 28 Apr 2024 13:04:07 -0500
Subject: [PATCH 66/77] Use TryGetValue instead of ContainsKey + []

---
 .../Training/Saving/SavedModel/AugmentedGraphView.cs      | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs b/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs
index 3b4bbdc63..9d0b3f001 100644
--- a/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs
+++ b/src/TensorFlowNET.Core/Training/Saving/SavedModel/AugmentedGraphView.cs
@@ -109,15 +109,11 @@ void merged_trackable(Trackable x)
 
     public List<(string, Trackable)> list_dependencies(Trackable obj)
     {
-        IDictionary<string, Trackable> children;
-        if (!_children_cache.ContainsKey(obj))
+        if (!_children_cache.TryGetValue(obj, out var children))
         {
             children= new Dictionary<string, Trackable>();
         }
-        else
-        {
-            children= _children_cache[obj];
-        }
+
         List<(string, Trackable)> res = new();
         foreach(var pair in obj.deserialization_dependencies(children))
         {

From f5ba382e49ab0132308739c219ea09b6ac254223 Mon Sep 17 00:00:00 2001
From: Schoen Tannenbaum <169845314+SchoenTannenbaum@users.noreply.github.com>
Date: Mon, 20 May 2024 12:09:06 -0400
Subject: [PATCH 67/77] Regularizer addition and fixes

---
 .../Keras/Regularizers/IRegularizer.cs        | 17 ++++--
 .../CustomizedRegularizerJsonConverter.cs     | 57 +++++++++++++++++++
 .../Operations/Regularizers/L1.cs             | 33 +++++++++++
 .../Operations/Regularizers/L1L2.cs           | 48 ++++++++++++++++
 .../Operations/Regularizers/L2.cs             | 33 +++++++++++
 src/TensorFlowNET.Keras/Regularizers.cs       | 19 +++++--
 src/TensorFlowNET.Keras/Regularizers/L1.cs    | 19 -------
 src/TensorFlowNET.Keras/Regularizers/L1L2.cs  | 24 --------
 src/TensorFlowNET.Keras/Regularizers/L2.cs    | 17 ------
 9 files changed, 198 insertions(+), 69 deletions(-)
 create mode 100644 src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedRegularizerJsonConverter.cs
 create mode 100644 src/TensorFlowNET.Core/Operations/Regularizers/L1.cs
 create mode 100644 src/TensorFlowNET.Core/Operations/Regularizers/L1L2.cs
 create mode 100644 src/TensorFlowNET.Core/Operations/Regularizers/L2.cs
 delete mode 100644 src/TensorFlowNET.Keras/Regularizers/L1.cs
 delete mode 100644 src/TensorFlowNET.Keras/Regularizers/L1L2.cs
 delete mode 100644 src/TensorFlowNET.Keras/Regularizers/L2.cs

diff --git a/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs b/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs
index f4045c7b2..e5de76ddb 100644
--- a/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs
+++ b/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs
@@ -1,7 +1,16 @@
-﻿namespace Tensorflow.Keras
+﻿using Newtonsoft.Json;
+using System.Collections.Generic;
+using Tensorflow.Keras.Saving.Common;
+
+namespace Tensorflow.Keras
 {
-    public interface IRegularizer
-    {
-        Tensor Apply(RegularizerArgs args);
+  [JsonConverter(typeof(CustomizedRegularizerJsonConverter))]
+  public interface IRegularizer
+  {
+    [JsonProperty("class_name")]
+    string ClassName { get; }
+    [JsonProperty("config")]
+    IDictionary<string, object> Config { get; }
+    Tensor Apply(RegularizerArgs args);
     }
 }
diff --git a/src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedRegularizerJsonConverter.cs b/src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedRegularizerJsonConverter.cs
new file mode 100644
index 000000000..4b1790aca
--- /dev/null
+++ b/src/TensorFlowNET.Core/Keras/Saving/Json/CustomizedRegularizerJsonConverter.cs
@@ -0,0 +1,57 @@
+﻿using Newtonsoft.Json.Linq;
+using Newtonsoft.Json;
+using System;
+using System.Collections.Generic;
+using System.Text;
+using Tensorflow.Operations.Regularizers;
+
+namespace Tensorflow.Keras.Saving.Common
+{
+  class RegularizerInfo
+  {
+    public string class_name { get; set; }
+    public JObject config { get; set; }
+  }
+
+  public class CustomizedRegularizerJsonConverter : JsonConverter
+    {
+        public override bool CanConvert(Type objectType)
+        {
+            return objectType == typeof(IRegularizer);
+        }
+
+        public override bool CanRead => true;
+
+        public override bool CanWrite => true;
+
+        public override void WriteJson(JsonWriter writer, object? value, JsonSerializer serializer)
+        {
+            var regularizer = value as IRegularizer;
+            if (regularizer is null)
+            {
+                JToken.FromObject(null).WriteTo(writer);
+                return;
+            }
+            JToken.FromObject(new RegularizerInfo()
+            {
+              class_name = regularizer.ClassName,
+              config = JObject.FromObject(regularizer.Config)
+            }, serializer).WriteTo(writer);
+        }
+
+        public override object? ReadJson(JsonReader reader, Type objectType, object? existingValue, JsonSerializer serializer)
+        {
+            var info = serializer.Deserialize<RegularizerInfo>(reader);
+            if (info is null)
+            {
+                return null;
+            }
+            return info.class_name switch
+            {
+                "L1L2" => new L1L2 (info.config["l1"].ToObject<float>(), info.config["l2"].ToObject<float>()),
+                "L1" => new L1(info.config["l1"].ToObject<float>()),
+                "L2" => new L2(info.config["l2"].ToObject<float>()),
+            };
+        }
+    }
+}
diff --git a/src/TensorFlowNET.Core/Operations/Regularizers/L1.cs b/src/TensorFlowNET.Core/Operations/Regularizers/L1.cs
new file mode 100644
index 000000000..8a5c68895
--- /dev/null
+++ b/src/TensorFlowNET.Core/Operations/Regularizers/L1.cs
@@ -0,0 +1,33 @@
+﻿using System;
+
+using Tensorflow.Keras;
+
+namespace Tensorflow.Operations.Regularizers
+{
+  public class L1 : IRegularizer
+  {
+    float _l1;
+    private readonly Dictionary<string, object> _config;
+
+    public string ClassName => "L2";
+    public virtual IDictionary<string, object> Config => _config;
+
+    public L1(float l1 = 0.01f)
+    {
+      //  l1 = 0.01 if l1 is None else l1
+      //  validate_float_arg(l1, name = "l1")
+      //  self.l1 = ops.convert_to_tensor(l1)
+      this._l1 = l1;
+
+      _config = new();
+      _config["l1"] = _l1;
+    }
+
+
+    public Tensor Apply(RegularizerArgs args)
+    {
+      //return self.l1 * ops.sum(ops.absolute(x))
+      return _l1 * math_ops.reduce_sum(math_ops.abs(args.X));
+    }
+  }
+}
diff --git a/src/TensorFlowNET.Core/Operations/Regularizers/L1L2.cs b/src/TensorFlowNET.Core/Operations/Regularizers/L1L2.cs
new file mode 100644
index 000000000..e3af00eb5
--- /dev/null
+++ b/src/TensorFlowNET.Core/Operations/Regularizers/L1L2.cs
@@ -0,0 +1,48 @@
+﻿using System;
+
+using Tensorflow.Keras;
+
+namespace Tensorflow.Operations.Regularizers
+{
+  public class L1L2 : IRegularizer
+  {
+    float _l1;
+    float _l2;
+    private readonly Dictionary<string, object> _config;
+
+    public string ClassName => "L1L2";
+    public virtual IDictionary<string, object> Config => _config;
+
+    public L1L2(float l1 = 0.0f, float l2 = 0.0f)
+    {
+      //l1 = 0.0 if l1 is None else l1
+      //l2 = 0.0 if l2 is None else l2
+      //  validate_float_arg(l1, name = "l1")
+      //  validate_float_arg(l2, name = "l2")
+
+      //  self.l1 = l1
+      //  self.l2 = l2
+      this._l1 = l1;
+      this._l2 = l2;
+
+      _config = new();
+      _config["l1"] = l1;
+      _config["l2"] = l2;
+    }
+
+    public Tensor Apply(RegularizerArgs args)
+    {
+        //regularization = ops.convert_to_tensor(0.0, dtype = x.dtype)
+        //if self.l1:
+        //    regularization += self.l1 * ops.sum(ops.absolute(x))
+        //if self.l2:
+        //    regularization += self.l2 * ops.sum(ops.square(x))
+        //return regularization
+
+        Tensor regularization = tf.constant(0.0, args.X.dtype);
+        regularization += _l1 * math_ops.reduce_sum(math_ops.abs(args.X));
+        regularization += _l2 * math_ops.reduce_sum(math_ops.square(args.X));
+        return regularization;
+    }
+  }
+}
diff --git a/src/TensorFlowNET.Core/Operations/Regularizers/L2.cs b/src/TensorFlowNET.Core/Operations/Regularizers/L2.cs
new file mode 100644
index 000000000..6c0e950a9
--- /dev/null
+++ b/src/TensorFlowNET.Core/Operations/Regularizers/L2.cs
@@ -0,0 +1,33 @@
+﻿using System;
+
+using Tensorflow.Keras;
+
+namespace Tensorflow.Operations.Regularizers
+{
+  public class L2 : IRegularizer
+  {
+    float _l2;
+    private readonly Dictionary<string, object> _config;
+
+    public string ClassName => "L2";
+    public virtual IDictionary<string, object> Config => _config;
+
+    public L2(float l2 = 0.01f)
+    {
+      //  l2 = 0.01 if l2 is None else l2
+      //  validate_float_arg(l2, name = "l2")
+      //  self.l2 = l2
+      this._l2 = l2;
+
+      _config = new();
+      _config["l2"] = _l2;
+    }
+
+
+    public Tensor Apply(RegularizerArgs args)
+    {
+      //return self.l2 * ops.sum(ops.square(x))
+      return _l2 * math_ops.reduce_sum(math_ops.square(args.X));
+    }
+  }
+}
diff --git a/src/TensorFlowNET.Keras/Regularizers.cs b/src/TensorFlowNET.Keras/Regularizers.cs
index 98da27a7f..9c6d07ca6 100644
--- a/src/TensorFlowNET.Keras/Regularizers.cs
+++ b/src/TensorFlowNET.Keras/Regularizers.cs
@@ -1,8 +1,17 @@
 ﻿namespace Tensorflow.Keras
 {
-    public class Regularizers
-    {
-        public IRegularizer l2(float l2 = 0.01f)
-            => new L2(l2);
-    }
+  public class Regularizers
+  {
+    public IRegularizer l1(float l1 = 0.01f)
+        => new Tensorflow.Operations.Regularizers.L1(l1);
+    public IRegularizer l2(float l2 = 0.01f)
+        => new Tensorflow.Operations.Regularizers.L2(l2);
+
+    //From TF source
+    //# The default value for l1 and l2 are different from the value in l1_l2
+    //# for backward compatibility reason. Eg, L1L2(l2=0.1) will only have l2
+    //# and no l1 penalty.
+    public IRegularizer l1l2(float l1 = 0.00f, float l2 = 0.00f)
+        => new Tensorflow.Operations.Regularizers.L1L2(l1, l2);
+  }
 }
diff --git a/src/TensorFlowNET.Keras/Regularizers/L1.cs b/src/TensorFlowNET.Keras/Regularizers/L1.cs
deleted file mode 100644
index 0f904b6f9..000000000
--- a/src/TensorFlowNET.Keras/Regularizers/L1.cs
+++ /dev/null
@@ -1,19 +0,0 @@
-﻿using System;
-
-namespace Tensorflow.Keras
-{
-    public class L1 : IRegularizer
-    {
-        float l1;
-
-        public L1(float l1 = 0.01f)
-        {
-            this.l1 = l1;
-        }
-
-        public Tensor Apply(RegularizerArgs args)
-        {
-            return l1 * math_ops.reduce_sum(math_ops.abs(args.X));
-        }
-    }
-}
diff --git a/src/TensorFlowNET.Keras/Regularizers/L1L2.cs b/src/TensorFlowNET.Keras/Regularizers/L1L2.cs
deleted file mode 100644
index f619f1582..000000000
--- a/src/TensorFlowNET.Keras/Regularizers/L1L2.cs
+++ /dev/null
@@ -1,24 +0,0 @@
-﻿using System;
-using static Tensorflow.Binding;
-namespace Tensorflow.Keras
-{
-    public class L1L2 : IRegularizer
-    {
-        float l1;
-        float l2;
-
-        public L1L2(float l1 = 0.0f, float l2 = 0.0f)
-        {
-            this.l1 = l1;
-            this.l2 = l2;
-
-        }
-        public Tensor Apply(RegularizerArgs args)
-        {
-            Tensor regularization = tf.constant(0.0, args.X.dtype);
-            regularization += l1 * math_ops.reduce_sum(math_ops.abs(args.X));
-            regularization += l2 * math_ops.reduce_sum(math_ops.square(args.X));
-            return regularization;
-        }
-    }
-}
diff --git a/src/TensorFlowNET.Keras/Regularizers/L2.cs b/src/TensorFlowNET.Keras/Regularizers/L2.cs
deleted file mode 100644
index 034bbd236..000000000
--- a/src/TensorFlowNET.Keras/Regularizers/L2.cs
+++ /dev/null
@@ -1,17 +0,0 @@
-﻿namespace Tensorflow.Keras
-{
-    public class L2 : IRegularizer
-    {
-        float l2;
-
-        public L2(float l2 = 0.01f)
-        {
-            this.l2 = l2;
-        }
-
-        public Tensor Apply(RegularizerArgs args)
-        {
-            return l2 * math_ops.reduce_sum(math_ops.square(args.X));
-        }
-    }
-}

From 5f9fce572d07768de9c1386bf29264a345e16c8c Mon Sep 17 00:00:00 2001
From: Schoen Tannenbaum <169845314+SchoenTannenbaum@users.noreply.github.com>
Date: Mon, 20 May 2024 12:10:09 -0400
Subject: [PATCH 68/77] RegularizerAPI and UnitTest

---
 .../Keras/Regularizers/IRegularizer.cs        | 11 ++++-
 .../Operations/Regularizers/L1.cs             |  2 +-
 src/TensorFlowNET.Keras/Regularizers.cs       | 44 +++++++++++++++--
 .../Model/ModelLoadTest.cs                    | 48 +++++++++++++++++++
 4 files changed, 98 insertions(+), 7 deletions(-)

diff --git a/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs b/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs
index e5de76ddb..06dbb7c8c 100644
--- a/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs
+++ b/src/TensorFlowNET.Core/Keras/Regularizers/IRegularizer.cs
@@ -12,5 +12,14 @@ public interface IRegularizer
     [JsonProperty("config")]
     IDictionary<string, object> Config { get; }
     Tensor Apply(RegularizerArgs args);
-    }
+  }
+
+  public interface IRegularizerApi
+  {
+    IRegularizer GetRegularizerFromName(string name);
+    IRegularizer L1 { get; }
+    IRegularizer L2 { get; }
+    IRegularizer L1L2 { get; }
+  }
+
 }
diff --git a/src/TensorFlowNET.Core/Operations/Regularizers/L1.cs b/src/TensorFlowNET.Core/Operations/Regularizers/L1.cs
index 8a5c68895..9e0619454 100644
--- a/src/TensorFlowNET.Core/Operations/Regularizers/L1.cs
+++ b/src/TensorFlowNET.Core/Operations/Regularizers/L1.cs
@@ -9,7 +9,7 @@ public class L1 : IRegularizer
     float _l1;
     private readonly Dictionary<string, object> _config;
 
-    public string ClassName => "L2";
+    public string ClassName => "L1";
     public virtual IDictionary<string, object> Config => _config;
 
     public L1(float l1 = 0.01f)
diff --git a/src/TensorFlowNET.Keras/Regularizers.cs b/src/TensorFlowNET.Keras/Regularizers.cs
index 9c6d07ca6..73b72a051 100644
--- a/src/TensorFlowNET.Keras/Regularizers.cs
+++ b/src/TensorFlowNET.Keras/Regularizers.cs
@@ -1,17 +1,51 @@
-﻿namespace Tensorflow.Keras
+﻿using Tensorflow.Operations.Regularizers;
+
+namespace Tensorflow.Keras
 {
-  public class Regularizers
+  public class Regularizers: IRegularizerApi
   {
+    private static Dictionary<string, IRegularizer> _nameActivationMap;
+
     public IRegularizer l1(float l1 = 0.01f)
-        => new Tensorflow.Operations.Regularizers.L1(l1);
+        => new L1(l1);
     public IRegularizer l2(float l2 = 0.01f)
-        => new Tensorflow.Operations.Regularizers.L2(l2);
+        => new L2(l2);
 
     //From TF source
     //# The default value for l1 and l2 are different from the value in l1_l2
     //# for backward compatibility reason. Eg, L1L2(l2=0.1) will only have l2
     //# and no l1 penalty.
     public IRegularizer l1l2(float l1 = 0.00f, float l2 = 0.00f)
-        => new Tensorflow.Operations.Regularizers.L1L2(l1, l2);
+        => new L1L2(l1, l2);
+
+    static Regularizers()
+    {
+      _nameActivationMap = new Dictionary<string, IRegularizer>();
+      _nameActivationMap["L1"] = new L1();
+      _nameActivationMap["L1"] = new L2();
+      _nameActivationMap["L1"] = new L1L2();
+    }
+
+    public IRegularizer L1 => l1();
+
+    public IRegularizer L2 => l2();
+
+    public IRegularizer L1L2 => l1l2();
+
+    public IRegularizer GetRegularizerFromName(string name)
+    {
+      if (name == null)
+      {
+        throw new Exception($"Regularizer name cannot be null");
+      }
+      if (!_nameActivationMap.TryGetValue(name, out var res))
+      {
+        throw new Exception($"Regularizer {name} not found");
+      }
+      else
+      {
+        return res;
+      }
+    }
   }
 }
diff --git a/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs b/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs
index 53a67cbfa..c733537e7 100644
--- a/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/Model/ModelLoadTest.cs
@@ -1,6 +1,7 @@
 ﻿using Microsoft.VisualStudio.TestPlatform.Utilities;
 using Microsoft.VisualStudio.TestTools.UnitTesting;
 using Newtonsoft.Json.Linq;
+using System.Collections.Generic;
 using System.Linq;
 using System.Xml.Linq;
 using Tensorflow.Keras.Engine;
@@ -129,6 +130,53 @@ public void TestModelBeforeTF2_5()
     }
 
 
+    [TestMethod]
+    public void BiasRegularizerSaveAndLoad()
+    {
+        var savemodel = keras.Sequential(new List<ILayer>()
+            {
+                tf.keras.layers.InputLayer((227, 227, 3)),
+                tf.keras.layers.Conv2D(96, (11, 11), (4, 4), activation:"relu", padding:"valid"),
+                tf.keras.layers.BatchNormalization(),
+                tf.keras.layers.MaxPooling2D((3, 3), strides:(2, 2)),
+
+                tf.keras.layers.Conv2D(256, (5, 5), (1, 1), "same", activation: keras.activations.Relu, bias_regularizer:keras.regularizers.L1L2),
+                tf.keras.layers.BatchNormalization(),
+
+                tf.keras.layers.Conv2D(256, (5, 5), (1, 1), "same", activation: keras.activations.Relu, bias_regularizer:keras.regularizers.L2),
+                tf.keras.layers.BatchNormalization(),
+
+                tf.keras.layers.Conv2D(256, (5, 5), (1, 1), "same", activation: keras.activations.Relu, bias_regularizer:keras.regularizers.L1),
+                tf.keras.layers.BatchNormalization(),
+                tf.keras.layers.MaxPooling2D((3, 3), (2, 2)),
+
+                tf.keras.layers.Flatten(),
+
+                tf.keras.layers.Dense(1000, activation: "linear"),
+                tf.keras.layers.Softmax(1)
+            });
+
+        savemodel.compile(tf.keras.optimizers.Adam(), tf.keras.losses.SparseCategoricalCrossentropy(from_logits: true), new string[] { "accuracy" });
+
+        var num_epochs = 1;
+        var batch_size = 8;
+
+        var trainDataset = new RandomDataSet(new Shape(227, 227, 3), 16);
+
+        savemodel.fit(trainDataset.Data, trainDataset.Labels, batch_size, num_epochs);
+
+        savemodel.save(@"./bias_regularizer_save_and_load", save_format: "tf");
+
+        var loadModel = tf.keras.models.load_model(@"./bias_regularizer_save_and_load");
+        loadModel.summary();
+
+        loadModel.compile(tf.keras.optimizers.Adam(), tf.keras.losses.SparseCategoricalCrossentropy(from_logits: true), new string[] { "accuracy" });
+
+        var fitDataset = new RandomDataSet(new Shape(227, 227, 3), 16);
+
+        loadModel.fit(fitDataset.Data, fitDataset.Labels, batch_size, num_epochs);
+    }
+
 
     [TestMethod]
     public void CreateConcatenateModelSaveAndLoad()

From b3ce158ec3304469bf776bc582b847e685a9df73 Mon Sep 17 00:00:00 2001
From: novikov-alexander <79649566+novikov-alexander@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:40:06 +0300
Subject: [PATCH 69/77] Update tensor_util.cs

---
 src/TensorFlowNET.Core/Tensors/tensor_util.cs | 40 +++++++++++++------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/src/TensorFlowNET.Core/Tensors/tensor_util.cs b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
index f688d4d5d..f2003c9d4 100644
--- a/src/TensorFlowNET.Core/Tensors/tensor_util.cs
+++ b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
@@ -1,4 +1,4 @@
-﻿/*****************************************************************************
+/*****************************************************************************
    Copyright 2018 The TensorFlow.NET Authors. All Rights Reserved.
 
    Licensed under the Apache License, Version 2.0 (the "License");
@@ -135,6 +135,23 @@ T[] ExpandArrayToSize<T>(IList<T> src)
             TF_DataType.TF_QINT32
         };
 
+        private static TOut[,] ConvertArray2D<TIn, TOut>(TIn[,] inputArray, Func<TIn, TOut> converter)
+        {
+            var rows = inputArray.GetLength(0);
+            var cols = inputArray.GetLength(1);
+            var outputArray = new TOut[rows, cols];
+
+            for (var i = 0; i < rows; i++)
+            {
+                for (var j = 0; j < cols; j++)
+                {
+                    outputArray[i, j] = converter(inputArray[i, j]);
+                }
+            }
+
+            return outputArray;
+        }
+
         /// <summary>
         /// Create a TensorProto, invoked in graph mode
         /// </summary>
@@ -157,19 +174,16 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T
             else if(origin_dtype != dtype)
             {
                 var new_system_dtype = dtype.as_system_dtype();
-                if (values is long[] long_values)
-                {
-                    if (dtype == TF_DataType.TF_INT32)
-                        values = long_values.Select(x => (int)Convert.ChangeType(x, new_system_dtype)).ToArray();
-                }
-                else if (values is double[] double_values)
+                
+                values = values switch
                 {
-                    if (dtype == TF_DataType.TF_FLOAT)
-                        values = double_values.Select(x => (float)Convert.ChangeType(x, new_system_dtype)).ToArray();
-                }
-                else
-                    values = Convert.ChangeType(values, new_system_dtype);
-
+                    long[] longValues when dtype == TF_DataType.TF_INT32 => longValues.Select(x => (int)x).ToArray(),
+                    float[] floatValues when dtype == TF_DataType.TF_DOUBLE => floatValues.Select(x => (double)x).ToArray(),
+                    float[,] float2DValues when dtype == TF_DataType.TF_DOUBLE => ConvertArray2D(float2DValues, Convert.ToDouble),
+                    double[] doubleValues when dtype == TF_DataType.TF_FLOAT => doubleValues.Select(x => (float)x).ToArray(),
+                    double[,] double2DValues when dtype == TF_DataType.TF_DOUBLE => ConvertArray2D(double2DValues, Convert.ToSingle),
+                    _ => Convert.ChangeType(values, new_system_dtype),
+                };
                 dtype = values.GetDataType();
             }
 

From 18db147eb40a07931e8421bbd63c64ce11edd558 Mon Sep 17 00:00:00 2001
From: novikov-alexander <79649566+novikov-alexander@users.noreply.github.com>
Date: Fri, 14 Jun 2024 14:40:37 +0300
Subject: [PATCH 70/77] Update GradientDescentOptimizerTests.cs

---
 .../Training/GradientDescentOptimizerTests.cs                 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
index f7062f00d..3b53ff9cd 100644
--- a/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
+++ b/test/TensorFlowNET.UnitTest/Training/GradientDescentOptimizerTests.cs
@@ -1,4 +1,4 @@
-﻿using Microsoft.VisualStudio.TestTools.UnitTesting;
+using Microsoft.VisualStudio.TestTools.UnitTesting;
 using System;
 using System.Linq;
 using Tensorflow;
@@ -82,7 +82,7 @@ private void TestMinimizeResourceVariable<T>() where T : struct
 
                 var pred = math_ops.matmul(var0, x) + var1;
                 var loss = pred * pred;
-                var sgd_op = tf.train.GradientDescentOptimizer(3.0f).minimize(loss);
+                var sgd_op = tf.train.GradientDescentOptimizer(1.0f).minimize(loss);
 
                 var global_variables = tf.global_variables_initializer();
                 sess.run(global_variables);

From 483ac82cd2db273c2c0520ce6923f5951638daba Mon Sep 17 00:00:00 2001
From: novikov-alexander <79649566+novikov-alexander@users.noreply.github.com>
Date: Fri, 14 Jun 2024 15:02:17 +0300
Subject: [PATCH 71/77] Update tensor_util.cs

---
 src/TensorFlowNET.Core/Tensors/tensor_util.cs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/TensorFlowNET.Core/Tensors/tensor_util.cs b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
index f2003c9d4..873579e42 100644
--- a/src/TensorFlowNET.Core/Tensors/tensor_util.cs
+++ b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
@@ -178,10 +178,15 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T
                 values = values switch
                 {
                     long[] longValues when dtype == TF_DataType.TF_INT32 => longValues.Select(x => (int)x).ToArray(),
+                    long[] longValues => values,
                     float[] floatValues when dtype == TF_DataType.TF_DOUBLE => floatValues.Select(x => (double)x).ToArray(),
+                    float[] floatValues => values,
                     float[,] float2DValues when dtype == TF_DataType.TF_DOUBLE => ConvertArray2D(float2DValues, Convert.ToDouble),
+                    float[,] float2DValues => values,
                     double[] doubleValues when dtype == TF_DataType.TF_FLOAT => doubleValues.Select(x => (float)x).ToArray(),
-                    double[,] double2DValues when dtype == TF_DataType.TF_DOUBLE => ConvertArray2D(double2DValues, Convert.ToSingle),
+                    double[] doubleValues => values,
+                    double[,] double2DValues when dtype == TF_DataType.TF_FLOAT => ConvertArray2D(double2DValues, Convert.ToSingle),
+                    double[,] double2DValues => values,
                     _ => Convert.ChangeType(values, new_system_dtype),
                 };
                 dtype = values.GetDataType();

From def57745b66d0537cdb70251584c940f327cd929 Mon Sep 17 00:00:00 2001
From: Alexander Novikov <novikov_alexander@live.ru>
Date: Wed, 19 Jun 2024 12:30:38 +0300
Subject: [PATCH 72/77] fix: more generic array cast

---
 src/TensorFlowNET.Core/Tensors/tensor_util.cs | 88 +++++++++++++------
 1 file changed, 59 insertions(+), 29 deletions(-)

diff --git a/src/TensorFlowNET.Core/Tensors/tensor_util.cs b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
index 873579e42..6e5024efd 100644
--- a/src/TensorFlowNET.Core/Tensors/tensor_util.cs
+++ b/src/TensorFlowNET.Core/Tensors/tensor_util.cs
@@ -67,7 +67,7 @@ public static NDArray MakeNdarray(TensorProto tensor)
 
             T[] ExpandArrayToSize<T>(IList<T> src)
             {
-                if(src.Count == 0)
+                if (src.Count == 0)
                 {
                     return new T[0];
                 }
@@ -77,7 +77,7 @@ T[] ExpandArrayToSize<T>(IList<T> src)
                 var first_elem = src[0];
                 var last_elem = src[src.Count - 1];
                 T[] res = new T[num_elements];
-                for(long i = 0; i < num_elements; i++)
+                for (long i = 0; i < num_elements; i++)
                 {
                     if (i < pre) res[i] = first_elem;
                     else if (i >= num_elements - after) res[i] = last_elem;
@@ -121,7 +121,7 @@ T[] ExpandArrayToSize<T>(IList<T> src)
                     $"https://www.tensorflow.org/api_docs/python/tf/dtypes%20for%20supported%20TF%20dtypes.");
             }
 
-            if(values.size == 0)
+            if (values.size == 0)
             {
                 return np.zeros(shape, tensor_dtype);
             }
@@ -135,23 +135,47 @@ T[] ExpandArrayToSize<T>(IList<T> src)
             TF_DataType.TF_QINT32
         };
 
-        private static TOut[,] ConvertArray2D<TIn, TOut>(TIn[,] inputArray, Func<TIn, TOut> converter)
+        private static Array ConvertArray<TOut>(Array inputArray, Func<object, TOut> converter)
         {
-            var rows = inputArray.GetLength(0);
-            var cols = inputArray.GetLength(1);
-            var outputArray = new TOut[rows, cols];
+            if (inputArray == null)
+                throw new ArgumentNullException(nameof(inputArray));
 
-            for (var i = 0; i < rows; i++)
+            var elementType = typeof(TOut);
+            var lengths = new int[inputArray.Rank];
+            for (var i = 0; i < inputArray.Rank; i++)
             {
-                for (var j = 0; j < cols; j++)
-                {
-                    outputArray[i, j] = converter(inputArray[i, j]);
-                }
+                lengths[i] = inputArray.GetLength(i);
             }
 
+            var outputArray = Array.CreateInstance(elementType, lengths);
+
+            FillArray(inputArray, outputArray, converter, new int[inputArray.Rank], 0);
+
             return outputArray;
         }
 
+        private static void FillArray<TIn, TOut>(Array inputArray, Array outputArray, Func<TIn, TOut> converter, int[] indices, int dimension)
+        {
+            if (dimension == inputArray.Rank - 1)
+            {
+                for (int i = 0; i < inputArray.GetLength(dimension); i++)
+                {
+                    indices[dimension] = i;
+                    var inputValue = (TIn)inputArray.GetValue(indices);
+                    var convertedValue = converter(inputValue);
+                    outputArray.SetValue(convertedValue, indices);
+                }
+            }
+            else
+            {
+                for (int i = 0; i < inputArray.GetLength(dimension); i++)
+                {
+                    indices[dimension] = i;
+                    FillArray(inputArray, outputArray, converter, indices, dimension + 1);
+                }
+            }
+        }
+
         /// <summary>
         /// Create a TensorProto, invoked in graph mode
         /// </summary>
@@ -171,24 +195,30 @@ public static TensorProto make_tensor_proto(object values, TF_DataType dtype = T
             var origin_dtype = values.GetDataType();
             if (dtype == TF_DataType.DtInvalid)
                 dtype = origin_dtype;
-            else if(origin_dtype != dtype)
+            else if (origin_dtype != dtype)
             {
                 var new_system_dtype = dtype.as_system_dtype();
-                
-                values = values switch
+
+                if (dtype != TF_DataType.TF_STRING && dtype != TF_DataType.TF_VARIANT && dtype != TF_DataType.TF_RESOURCE)
+                {
+                    if (values is Array arrayValues)
+                    {
+                        values = dtype switch
+                        {
+                            TF_DataType.TF_INT32 => ConvertArray(arrayValues, Convert.ToInt32),
+                            TF_DataType.TF_FLOAT => ConvertArray(arrayValues, Convert.ToSingle),
+                            TF_DataType.TF_DOUBLE => ConvertArray(arrayValues, Convert.ToDouble),
+                            _ => values,
+                        };
+                    } else
+                    {
+                        values = Convert.ChangeType(values, new_system_dtype);
+                    }
+                    
+                } else
                 {
-                    long[] longValues when dtype == TF_DataType.TF_INT32 => longValues.Select(x => (int)x).ToArray(),
-                    long[] longValues => values,
-                    float[] floatValues when dtype == TF_DataType.TF_DOUBLE => floatValues.Select(x => (double)x).ToArray(),
-                    float[] floatValues => values,
-                    float[,] float2DValues when dtype == TF_DataType.TF_DOUBLE => ConvertArray2D(float2DValues, Convert.ToDouble),
-                    float[,] float2DValues => values,
-                    double[] doubleValues when dtype == TF_DataType.TF_FLOAT => doubleValues.Select(x => (float)x).ToArray(),
-                    double[] doubleValues => values,
-                    double[,] double2DValues when dtype == TF_DataType.TF_FLOAT => ConvertArray2D(double2DValues, Convert.ToSingle),
-                    double[,] double2DValues => values,
-                    _ => Convert.ChangeType(values, new_system_dtype),
-                };
+
+                }
                 dtype = values.GetDataType();
             }
 
@@ -306,7 +336,7 @@ bool hasattr(Graph property, string attr)
 
             if (tensor is EagerTensor eagerTensor)
             {
-                if(tensor.dtype == tf.int64)
+                if (tensor.dtype == tf.int64)
                     return new Shape(tensor.ToArray<long>());
                 else
                     return new Shape(tensor.ToArray<int>());
@@ -481,7 +511,7 @@ bool hasattr(Graph property, string attr)
                 var d_ = new int[value.size];
                 foreach (var (index, d) in enumerate(value.ToArray<int>()))
                     d_[index] = d >= 0 ? d : -1;
-                
+
                 ret = ret.merge_with(new Shape(d_));
             }
             return ret;

From 5142ad658cf9233abd2c9fe727c2daeea84a88f6 Mon Sep 17 00:00:00 2001
From: Aleksej Solomatin <aleksejsolomatin@gmail.com>
Date: Sun, 30 Jun 2024 22:06:12 +0300
Subject: [PATCH 73/77] test: Added an `evaluate` method call to a unit test
 for a multi-input model.

---
 test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs b/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs
index dd8ef8f91..bb293bd90 100644
--- a/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs
@@ -54,6 +54,13 @@ public void LeNetModel()
             var x = new NDArray[] { x1, x2 };
             model.fit(x, dataset.Train.Labels, batch_size: 8, epochs: 3);
 
+            x1 = x1["0:8"];
+            x2 = x1;
+
+            x = new NDArray[] { x1, x2 };
+            var y = dataset.Train.Labels["0:8"];
+            (model as Engine.Model).evaluate(x, y);
+
             x1 = np.ones((1, 28, 28, 1), TF_DataType.TF_FLOAT);
             x2 = np.zeros((1, 28, 28, 1), TF_DataType.TF_FLOAT);
             var pred = model.predict((x1, x2));

From f8b7bdeb9b7fa10bf49b888934683f04febfc6e2 Mon Sep 17 00:00:00 2001
From: Aleksej Solomatin <aleksejsolomatin@gmail.com>
Date: Sun, 30 Jun 2024 22:43:01 +0300
Subject: [PATCH 74/77] test: Added a unit test of training a multi-input model
 using a dataset.

---
 .../MultiInputModelTest.cs                    | 75 +++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs b/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs
index bb293bd90..54b76d41a 100644
--- a/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs
+++ b/test/TensorFlowNET.Keras.UnitTest/MultiInputModelTest.cs
@@ -2,6 +2,7 @@
 using System;
 using Tensorflow.Keras.Optimizers;
 using Tensorflow.NumPy;
+using static Tensorflow.Binding;
 using static Tensorflow.KerasApi;
 
 namespace Tensorflow.Keras.UnitTest
@@ -66,5 +67,79 @@ public void LeNetModel()
             var pred = model.predict((x1, x2));
             Console.WriteLine(pred);
         }
+
+        [TestMethod]
+        public void LeNetModelDataset()
+        {
+            var inputs = keras.Input((28, 28, 1));
+            var conv1 = keras.layers.Conv2D(16, (3, 3), activation: "relu", padding: "same").Apply(inputs);
+            var pool1 = keras.layers.MaxPooling2D((2, 2), 2).Apply(conv1);
+            var conv2 = keras.layers.Conv2D(32, (3, 3), activation: "relu", padding: "same").Apply(pool1);
+            var pool2 = keras.layers.MaxPooling2D((2, 2), 2).Apply(conv2);
+            var flat1 = keras.layers.Flatten().Apply(pool2);
+
+            var inputs_2 = keras.Input((28, 28, 1));
+            var conv1_2 = keras.layers.Conv2D(16, (3, 3), activation: "relu", padding: "same").Apply(inputs_2);
+            var pool1_2 = keras.layers.MaxPooling2D((4, 4), 4).Apply(conv1_2);
+            var conv2_2 = keras.layers.Conv2D(32, (1, 1), activation: "relu", padding: "same").Apply(pool1_2);
+            var pool2_2 = keras.layers.MaxPooling2D((2, 2), 2).Apply(conv2_2);
+            var flat1_2 = keras.layers.Flatten().Apply(pool2_2);
+
+            var concat = keras.layers.Concatenate().Apply((flat1, flat1_2));
+            var dense1 = keras.layers.Dense(512, activation: "relu").Apply(concat);
+            var dense2 = keras.layers.Dense(128, activation: "relu").Apply(dense1);
+            var dense3 = keras.layers.Dense(10, activation: "relu").Apply(dense2);
+            var output = keras.layers.Softmax(-1).Apply(dense3);
+
+            var model = keras.Model((inputs, inputs_2), output);
+            model.summary();
+
+            var data_loader = new MnistModelLoader();
+
+            var dataset = data_loader.LoadAsync(new ModelLoadSetting
+            {
+                TrainDir = "mnist",
+                OneHot = false,
+                ValidationSize = 59900,
+            }).Result;
+
+            var loss = keras.losses.SparseCategoricalCrossentropy();
+            var optimizer = new Adam(0.001f);
+            model.compile(optimizer, loss, new string[] { "accuracy" });
+
+            NDArray x1 = np.reshape(dataset.Train.Data, (dataset.Train.Data.shape[0], 28, 28, 1));
+
+            var multiInputDataset = tf.data.Dataset.zip(
+                tf.data.Dataset.from_tensor_slices(x1),
+                tf.data.Dataset.from_tensor_slices(x1),
+                tf.data.Dataset.from_tensor_slices(dataset.Train.Labels)
+            ).batch(8);
+            multiInputDataset.FirstInputTensorCount = 2;
+
+            model.fit(multiInputDataset, epochs: 3);
+
+            x1 = x1["0:8"];
+
+            multiInputDataset = tf.data.Dataset.zip(
+                tf.data.Dataset.from_tensor_slices(x1),
+                tf.data.Dataset.from_tensor_slices(x1),
+                tf.data.Dataset.from_tensor_slices(dataset.Train.Labels["0:8"])
+            ).batch(8);
+            multiInputDataset.FirstInputTensorCount = 2;
+
+            (model as Engine.Model).evaluate(multiInputDataset);
+
+            x1 = np.ones((1, 28, 28, 1), TF_DataType.TF_FLOAT);
+            var x2 = np.zeros((1, 28, 28, 1), TF_DataType.TF_FLOAT);
+
+            multiInputDataset = tf.data.Dataset.zip(
+                tf.data.Dataset.from_tensor_slices(x1),
+                tf.data.Dataset.from_tensor_slices(x2)
+            ).batch(8);
+            multiInputDataset.FirstInputTensorCount = 2;
+
+            var pred = model.predict(multiInputDataset);
+            Console.WriteLine(pred);
+        }
     }
 }

From 93dda17944b6e34380897ad3480ac2218fb7398e Mon Sep 17 00:00:00 2001
From: Aleksej Solomatin <aleksejsolomatin@gmail.com>
Date: Sun, 30 Jun 2024 22:44:03 +0300
Subject: [PATCH 75/77] fix: Added support for training a multi-input model
 using a dataset.

---
 src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs | 14 +++++++++++++-
 src/TensorFlowNET.Keras/Engine/Model.Fit.cs      | 13 ++++++++++++-
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
index b3264429e..ec99d7ef9 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Evaluate.cs
@@ -112,7 +112,19 @@ public Dictionary<string, float> evaluate(IDatasetV2 x, int verbose = 1, bool is
                 Steps = data_handler.Inferredsteps
             });
 
-            return evaluate(data_handler, callbacks, is_val, test_function);
+            Func<DataHandler, OwnedIterator, Dictionary<string, float>> testFunction;
+
+            if (data_handler.DataAdapter.GetDataset().structure.Length > 2 ||
+                data_handler.DataAdapter.GetDataset().FirstInputTensorCount > 1)
+            {
+                testFunction = test_step_multi_inputs_function;
+            }
+            else
+            {
+                testFunction = test_function;
+            }
+
+            return evaluate(data_handler, callbacks, is_val, testFunction);
         }
 
         /// <summary>
diff --git a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
index 13a1b63bc..e1303513e 100644
--- a/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
+++ b/src/TensorFlowNET.Keras/Engine/Model.Fit.cs
@@ -179,9 +179,20 @@ public ICallback fit(IDatasetV2 dataset,
                 StepsPerExecution = _steps_per_execution
             });
 
+            Func<DataHandler, OwnedIterator, Dictionary<string, float>> trainStepFunction;
+
+            if (data_handler.DataAdapter.GetDataset().structure.Length > 2 ||
+                data_handler.DataAdapter.GetDataset().FirstInputTensorCount > 1)
+            {
+                trainStepFunction = train_step_multi_inputs_function;
+            }
+            else
+            {
+                trainStepFunction = train_step_function;
+            }
 
             return FitInternal(data_handler, epochs, validation_step, verbose, callbacks, validation_data: validation_data,
-                    train_step_func: train_step_function);
+                    train_step_func: trainStepFunction);
         }
 
         History FitInternal(DataHandler data_handler, int epochs, int validation_step, int verbose, List<ICallback> callbackList, IDatasetV2 validation_data, 

From b6c5d26fab9a5eab72c0c81c554fec8412d86771 Mon Sep 17 00:00:00 2001
From: Leonardo Doherty <73901464+eLDoherty@users.noreply.github.com>
Date: Mon, 13 Jan 2025 23:29:04 -0500
Subject: [PATCH 76/77] fix: Resolve fixed-size array issue

Replace .ToArray() with .ToList() to allow dynamic modification of network_nodes in MapGraphNetwork()

Replaced .ToArray() with .ToList() to resolve the issue where .Add() was called on a fixed-size array.

This preventing the "Collection was of a fixed size" error when called something like this var model = keras.Model(new Tensors(new Tensor[] { encoder_inputs, decoder_inputs }), outputs: decoder_dense);
---
 src/TensorFlowNET.Keras/Engine/Functional.cs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/TensorFlowNET.Keras/Engine/Functional.cs b/src/TensorFlowNET.Keras/Engine/Functional.cs
index 7347585f8..75854d82c 100644
--- a/src/TensorFlowNET.Keras/Engine/Functional.cs
+++ b/src/TensorFlowNET.Keras/Engine/Functional.cs
@@ -180,7 +180,7 @@ void ComputeTensorUsageCount()
             var (nodes_in_decreasing_depth, layer_indices) = BuildMap(outputs);
             var network_nodes = nodes_in_decreasing_depth
                 .Select(node => MakeNodeKey(node.Layer.Name, node.Layer.InboundNodes.IndexOf(node)))
-                .ToArray();
+                .ToList();
 
             var nodes_depths = new Dictionary<INode, int>();
             var layers_depths = new Dictionary<ILayer, int>();
@@ -221,7 +221,7 @@ void ComputeTensorUsageCount()
                     layers_depths[input_layer] = 0;
                     layer_indices[input_layer] = -1;
                     nodes_depths[input_layer.InboundNodes[0]] = 0;
-                    network_nodes.add(MakeNodeKey(input_layer.Name, 0));
+                    network_nodes.Add(MakeNodeKey(input_layer.Name, 0));
                 }
             }
 
@@ -231,7 +231,7 @@ void ComputeTensorUsageCount()
             {
                 if (!nodes_by_depth.ContainsKey(depth))
                     nodes_by_depth[depth] = new List<INode>();
-                nodes_by_depth[depth].append(node);
+                nodes_by_depth[depth].Add(node);
             }
 
             var layers_by_depth = new Dictionary<int, List<ILayer>>();
@@ -239,7 +239,7 @@ void ComputeTensorUsageCount()
             {
                 if (!layers_by_depth.ContainsKey(depth))
                     layers_by_depth[depth] = new List<ILayer>();
-                layers_by_depth[depth].append(layer);
+                layers_by_depth[depth].Add(layer);
             }
 
             // Get sorted list of layer depths.
@@ -260,7 +260,7 @@ void ComputeTensorUsageCount()
             // Get sorted list of node depths.
             depth_keys = nodes_by_depth.Keys.OrderBy(x => x).Reverse();
 
-            return (network_nodes, nodes_by_depth, layers, layers_by_depth);
+            return (network_nodes.ToArray(), nodes_by_depth, layers, layers_by_depth);
         }
 
         string MakeNodeKey(string layer_name, int node_index)

From 6ce6066551ce80202119a121a05b006aadd9ef37 Mon Sep 17 00:00:00 2001
From: Haiping <haiping008@gmail.com>
Date: Wed, 22 Jan 2025 09:46:45 -0600
Subject: [PATCH 77/77] Update release.yml

---
 .github/workflows/release.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 8f862e329..02601764c 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -53,7 +53,7 @@ jobs:
         }
 
     - name: Upload packages artifacts
-      uses: actions/upload-artifact@v1.0.0
+      uses: actions/upload-artifact@v4.0.0
       with:
         name: "drop-ci-packages"
         path: './packages'