From 3bd48f9ac68feb088b1243db656aec21bc7e9fb5 Mon Sep 17 00:00:00 2001
From: CristianCuadrado <c.cuadrado91@gmail.com>
Date: Thu, 3 Sep 2020 17:52:47 +0100
Subject: [PATCH 1/8] add utils for deepecho models

---
 deepecho/models/utils.py | 232 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 232 insertions(+)
 create mode 100644 deepecho/models/utils.py

diff --git a/deepecho/models/utils.py b/deepecho/models/utils.py
new file mode 100644
index 0000000..fa5fdfd
--- /dev/null
+++ b/deepecho/models/utils.py
@@ -0,0 +1,232 @@
+"""Utils for models."""
+# pylint: disable-all
+
+import numpy as np
+import pandas as pd
+import torch
+
+
+def context_to_tensor_gan(context, context_size, context_map):
+    """Convert the input context to the corresponding tensor."""
+    tensor = torch.zeros(context_size)
+    for column, properties in context_map.items():
+        value = context[column]
+        value_to_tensor(tensor, value, properties)
+
+    return tensor
+
+
+def data_to_tensor_gan(data, model_data_size, data_map, max_sequence_length, fixed_length=None):
+    """Convert the input data to the corresponding tensor.
+
+    If ``self._fixed_length`` is ``False``, add a 1.0 to indicate
+    the sequence end and pad the rest of the sequence with 0.0s.
+    """
+    tensors = []
+    num_rows = len(data[0])
+    for row in range(num_rows):
+        tensor = torch.zeros(model_data_size)
+        for column, properties in data_map.items():
+            value = data[column][row]
+            value_to_tensor(tensor, value, properties)
+
+        tensors.append(tensor)
+
+    if not fixed_length:
+        tensors[-1][-1] = 1.0
+
+    for _ in range(max_sequence_length - num_rows):
+        tensors.append(torch.zeros(model_data_size))
+
+    return torch.stack(tensors, dim=0)
+
+
+@staticmethod
+def denormalize(tensor, row, properties, round_value, std_norm=False):
+    """Denormalize previously normalized values, setting NaN values if necessary.
+
+    If 'std_norm' is True, denormalize from 0 and 1.
+    If 'std_norm' is False, denormalize from -1 and 1.
+    """
+    value_idx, missing_idx = properties['indices']
+    if tensor[row, 0, missing_idx] > 0.5:
+        return None
+
+    normalized = tensor[row, 0, value_idx].item()
+    column_min = properties['min']
+    column_range = properties['max'] - column_min
+
+    if std_norm:
+        denormalized = (normalized) * column_range + column_min
+    else:
+        denormalized = (normalized + 1) * column_range / 2.0 + column_min
+
+    if round_value:
+        denormalized = round(denormalized)
+
+    return denormalized
+
+
+@staticmethod
+def index_map(columns, types):
+    """Decide which dimension will store which column information in the tensor.
+
+    The output of this function has two elements:
+
+        - An idx_map, which is a dict that indicates the indexes at which
+          the list of tensor dimensions associated with each input column starts,
+          and the properties of such columns.
+        - An integer that indicates how many dimensions the tensor will have.
+
+    In order to decide this, the following process is followed for each column:
+
+        - If the column is numerical (continuous or count), 2 dimensions are created
+          for it. These will contain information about the value itself, as well
+          as information about whether the value should be NaN or not.
+        - If the column is categorical or ordinal, 1 dimentions is created for
+          each possible value, which will be later on used to hold one-hot encoding
+          information about the values.
+    """
+    dimensions = 0
+    mapping = {}
+    for column, column_type in enumerate(types):
+        values = columns[column]
+        if column_type in ('continuous', 'count'):
+            mapping[column] = {
+                'type': column_type,
+                'min': np.min(values),
+                'max': np.max(values),
+                'indices': (dimensions, dimensions + 1)
+            }
+            dimensions += 2
+
+        elif column_type in ('categorical', 'ordinal'):
+            indices = {}
+            for value in set(values):
+                indices[value] = dimensions
+                dimensions += 1
+
+            mapping[column] = {
+                'type': column_type,
+                'indices': indices
+            }
+
+        else:
+            raise ValueError('Unsupported type: {}'.format(column_type))
+
+    return mapping, dimensions
+
+
+@staticmethod
+def normalize(tensor, value, properties, std_norm=False):
+    """Normalize value and flag nans.
+
+    If 'std_norm' is True, normalize between 0 and 1.
+    If 'std_norm' is False, normalize between -1 and 1.
+    """
+    value_idx, missing_idx = properties['indices']
+    if pd.isnull(value):
+        tensor[value_idx] = 0.0
+        tensor[missing_idx] = 1.0
+    else:
+        column_min = properties['min']
+        column_range = properties['max'] - column_min
+        offset = value - column_min
+
+        if std_norm:
+            tensor[value_idx] = offset / column_range
+        else:
+            tensor[value_idx] = 2.0 * offset / column_range - 1.0
+
+        tensor[missing_idx] = 0.0
+
+    return tensor
+
+
+@staticmethod
+def one_hot_decode(tensor, row, properties):
+    """Obtain the category that corresponds to the highest one-hot value."""
+    max_value = float('-inf')
+    for category, idx in properties['indices'].items():
+        value = tensor[row, 0, idx]
+        if value > max_value:
+            max_value = value
+            selected = category
+
+    return selected
+
+
+@staticmethod
+def one_hot_encode(tensor, value, properties):
+    """Update the index that corresponds to the value to 1.0."""
+    value_index = properties['indices'][value]
+    tensor[value_index] = 1.0
+
+    return tensor
+
+
+def transform(data, data_map):
+    """Transform data."""
+    for properties in data_map.values():
+        column_type = properties['type']
+        if column_type in ('continuous', 'count'):
+            value_idx, missing_idx = properties['indices']
+            data[:, :, value_idx] = torch.tanh(data[:, :, value_idx])
+            data[:, :, missing_idx] = torch.sigmoid(data[:, :, missing_idx])
+        elif column_type in ('categorical', 'ordinal'):
+            indices = list(properties['indices'].values())
+            data[:, :, indices] = torch.nn.functional.gumbel_softmax(
+                data[:, :, indices], hard=True)
+
+    return data
+
+
+def truncate(generated, data_size):
+    """Truncate generated samples."""
+    end_flag = (generated[:, :, data_size] > 0.5).float().round()
+    generated[:, :, data_size] = end_flag
+
+    for sequence_idx in range(generated.shape[1]):
+        # Pad with zeroes after end_flag == 1
+        sequence = generated[:, sequence_idx]
+        end_flag = sequence[:, data_size]
+        if (end_flag == 1.0).any():
+            cut_idx = end_flag.detach().cpu().numpy().argmax()
+            sequence[cut_idx + 1:] = 0.0
+
+
+def tensor_to_data_gan(tensor, data_map):
+    """Rebuild a valid sequence from the given tensor."""
+    sequence_length, num_sequences, _ = tensor.shape
+    assert num_sequences == 1
+
+    data = [None] * len(data_map)
+    for column, properties in data_map.items():
+        column_type = properties['type']
+
+        column_data = []
+        data[column] = column_data
+        for row in range(sequence_length):
+            if column_type in ('continuous', 'count'):
+                round_value = column_type == 'count'
+                value = denormalize(tensor, row, properties, round_value=round_value)
+            elif column_type in ('categorical', 'ordinal'):
+                value = one_hot_decode(tensor, row, properties)
+            else:
+                raise ValueError()   # Theoretically unreachable
+
+            column_data.append(value)
+
+    return data
+
+
+def value_to_tensor(tensor, value, properties):
+    """Update the tensor according to the value and properties."""
+    column_type = properties['type']
+    if column_type in ('continuous', 'count'):
+        normalize(tensor, value, properties)
+    elif column_type in ('categorical', 'ordinal'):
+        one_hot_encode(tensor, value, properties)
+
+    else:
+        raise ValueError()   # Theoretically unreachable

From 3f4a7aa622af0f45a82781841489d0d3de069b83 Mon Sep 17 00:00:00 2001
From: CristianCuadrado <c.cuadrado91@gmail.com>
Date: Thu, 3 Sep 2020 17:57:15 +0100
Subject: [PATCH 2/8] fix _transform

---
 deepecho/models/basic_gan.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/deepecho/models/basic_gan.py b/deepecho/models/basic_gan.py
index f91fc0e..4378e10 100644
--- a/deepecho/models/basic_gan.py
+++ b/deepecho/models/basic_gan.py
@@ -406,7 +406,8 @@ def _transform(self, data):
                 data[:, :, missing_idx] = torch.sigmoid(data[:, :, missing_idx])
             elif column_type in ('categorical', 'ordinal'):
                 indices = list(properties['indices'].values())
-                data[:, :, indices] = torch.nn.functional.softmax(data[:, :, indices])
+                data[:, :, indices] = torch.nn.functional.gumbel_softmax(data[:, :, indices],
+                                                                         hard=True)
 
         return data
 

From fcc81c716cd734a50edee82978d495c9f5f00bfc Mon Sep 17 00:00:00 2001
From: CristianCuadrado <c.cuadrado91@gmail.com>
Date: Fri, 4 Sep 2020 12:55:14 +0100
Subject: [PATCH 3/8] add functions to utils

---
 deepecho/models/basic_gan.py |  15 +--
 deepecho/models/utils.py     | 192 ++++++++++++++---------------------
 2 files changed, 76 insertions(+), 131 deletions(-)

diff --git a/deepecho/models/basic_gan.py b/deepecho/models/basic_gan.py
index 4378e10..e494836 100644
--- a/deepecho/models/basic_gan.py
+++ b/deepecho/models/basic_gan.py
@@ -24,7 +24,6 @@ class BasicGenerator(torch.nn.Module):
 
     This generator consist on a RNN layer followed by a Linear layer with
     the following schema:
-
         - The Generator takes as input a ``sequence_length`` and a ``context`` vector.
         - The ``context`` vector is expanded over the ``sequence_lenght`` and padded with
           ``latent_size`` random noise.
@@ -33,7 +32,6 @@ class BasicGenerator(torch.nn.Module):
           generates an output of shape ``(sequence_length, context_length, hidden_size)``.
         - The RNN output is passed to the Linear layer that outputs a tensor of size
           ``(sequence_length, context_length, output_size)``
-
     Args:
         context_size (int):
             Size of the contextual arrays.
@@ -78,14 +76,12 @@ class BasicDiscriminator(torch.nn.Module):
 
     This discriminator consist on a RNN layer followed by a Linear layer with
     the following schema:
-
         - The Discriminator takes as input a collection of sequences that include
           both the data and the context columns.
         - RNN takes as input a tensor with shape
           ``(sequence_length, number_of_sequences, context_size + data_size)`` and
           generates an output of shape ``(sequence_length, num_sequences, hidden_size)``.
         - The RNN output is passed to the Linear layer that outputs a single value.
-
     Args:
         context_size (int):
             Number of values in the contextual arrays.
@@ -121,7 +117,6 @@ class BasicGANModel(DeepEcho):
         - apply sigmoid to continuous/count/datetime
         - apply softmax to categorical/ordinal
     4. Define a discriminator that takes sequence + context -> score.
-
     Args:
         epochs (int):
             Number of training epochs. Defaults to 1024.
@@ -196,14 +191,11 @@ def _index_map(columns, types):
         """Decide which dimension will store which column information in the tensor.
 
         The output of this function has two elements:
-
             - An idx_map, which is a dict that indicates the indexes at which
               the list of tensor dimensions associated with each input column starts,
               and the properties of such columns.
             - An integer that indicates how many dimensions the tensor will have.
-
         In order to decide this, the following process is followed for each column:
-
             - If the column is numerical (continuous or count), 2 dimensions are created
               for it. These will contain information about the value itself, as well
               as information about whether the value should be NaN or not.
@@ -406,8 +398,7 @@ def _transform(self, data):
                 data[:, :, missing_idx] = torch.sigmoid(data[:, :, missing_idx])
             elif column_type in ('categorical', 'ordinal'):
                 indices = list(properties['indices'].values())
-                data[:, :, indices] = torch.nn.functional.gumbel_softmax(data[:, :, indices],
-                                                                         hard=True)
+                data[:, :, indices] = torch.nn.functional.softmax(data[:, :, indices])
 
         return data
 
@@ -492,7 +483,6 @@ def fit_sequences(self, sequences, context_types, data_types):
                 List of sequences. Each sequence is a single training example
                 (i.e. an example of a multivariate time series with some context).
                 For example, a sequence might look something like::
-
                     {
                         "context": [1],
                         "data": [
@@ -501,11 +491,9 @@ def fit_sequences(self, sequences, context_types, data_types):
                             [1, 3, 4, 5,  2, 3, 1]
                         ]
                     }
-
                 The "context" attribute maps to a list of variables which
                 should be used for conditioning. These are variables which
                 do not change over time.
-
                 The "data" attribute contains a list of lists corrsponding
                 to the actual time series data such that `data[i][j]` contains
                 the value at the jth time step of the ith channel of the
@@ -559,7 +547,6 @@ def sample_sequence(self, context, sequence_length=None):
             context (list):
                 The list of values to condition on. It must match
                 the types specified in context_types when fit was called.
-
         Returns:
             list[list]:
                 A list of lists (data) corresponding to the types specified
diff --git a/deepecho/models/utils.py b/deepecho/models/utils.py
index fa5fdfd..8da71f9 100644
--- a/deepecho/models/utils.py
+++ b/deepecho/models/utils.py
@@ -6,68 +6,6 @@
 import torch
 
 
-def context_to_tensor_gan(context, context_size, context_map):
-    """Convert the input context to the corresponding tensor."""
-    tensor = torch.zeros(context_size)
-    for column, properties in context_map.items():
-        value = context[column]
-        value_to_tensor(tensor, value, properties)
-
-    return tensor
-
-
-def data_to_tensor_gan(data, model_data_size, data_map, max_sequence_length, fixed_length=None):
-    """Convert the input data to the corresponding tensor.
-
-    If ``self._fixed_length`` is ``False``, add a 1.0 to indicate
-    the sequence end and pad the rest of the sequence with 0.0s.
-    """
-    tensors = []
-    num_rows = len(data[0])
-    for row in range(num_rows):
-        tensor = torch.zeros(model_data_size)
-        for column, properties in data_map.items():
-            value = data[column][row]
-            value_to_tensor(tensor, value, properties)
-
-        tensors.append(tensor)
-
-    if not fixed_length:
-        tensors[-1][-1] = 1.0
-
-    for _ in range(max_sequence_length - num_rows):
-        tensors.append(torch.zeros(model_data_size))
-
-    return torch.stack(tensors, dim=0)
-
-
-@staticmethod
-def denormalize(tensor, row, properties, round_value, std_norm=False):
-    """Denormalize previously normalized values, setting NaN values if necessary.
-
-    If 'std_norm' is True, denormalize from 0 and 1.
-    If 'std_norm' is False, denormalize from -1 and 1.
-    """
-    value_idx, missing_idx = properties['indices']
-    if tensor[row, 0, missing_idx] > 0.5:
-        return None
-
-    normalized = tensor[row, 0, value_idx].item()
-    column_min = properties['min']
-    column_range = properties['max'] - column_min
-
-    if std_norm:
-        denormalized = (normalized) * column_range + column_min
-    else:
-        denormalized = (normalized + 1) * column_range / 2.0 + column_min
-
-    if round_value:
-        denormalized = round(denormalized)
-
-    return denormalized
-
-
-@staticmethod
 def index_map(columns, types):
     """Decide which dimension will store which column information in the tensor.
 
@@ -117,12 +55,10 @@ def index_map(columns, types):
     return mapping, dimensions
 
 
-@staticmethod
-def normalize(tensor, value, properties, std_norm=False):
+def normalize(tensor, value, properties):
     """Normalize value and flag nans.
 
-    If 'std_norm' is True, normalize between 0 and 1.
-    If 'std_norm' is False, normalize between -1 and 1.
+    Normalize between -1 and 1.
     """
     value_idx, missing_idx = properties['indices']
     if pd.isnull(value):
@@ -133,17 +69,34 @@ def normalize(tensor, value, properties, std_norm=False):
         column_range = properties['max'] - column_min
         offset = value - column_min
 
-        if std_norm:
-            tensor[value_idx] = offset / column_range
-        else:
-            tensor[value_idx] = 2.0 * offset / column_range - 1.0
-
+        tensor[value_idx] = 2.0 * offset / column_range - 1.0
         tensor[missing_idx] = 0.0
 
-    return tensor
+
+def denormalize(tensor, row, properties, round_value):
+    """Denormalize previously normalized values, setting NaN values if necessary."""
+    value_idx, missing_idx = properties['indices']
+    if tensor[row, 0, missing_idx] > 0.5:
+        return None
+
+    normalized = tensor[row, 0, value_idx].item()
+    column_min = properties['min']
+    column_range = properties['max'] - column_min
+
+    denormalized = (normalized + 1) * column_range / 2.0 + column_min
+
+    if round_value:
+        denormalized = round(denormalized)
+
+    return denormalized
+
+
+def one_hot_encode(tensor, value, properties):
+    """Update the index that corresponds to the value to 1.0."""
+    value_index = properties['indices'][value]
+    tensor[value_index] = 1.0
 
 
-@staticmethod
 def one_hot_decode(tensor, row, properties):
     """Obtain the category that corresponds to the highest one-hot value."""
     max_value = float('-inf')
@@ -156,46 +109,63 @@ def one_hot_decode(tensor, row, properties):
     return selected
 
 
-@staticmethod
-def one_hot_encode(tensor, value, properties):
-    """Update the index that corresponds to the value to 1.0."""
-    value_index = properties['indices'][value]
-    tensor[value_index] = 1.0
+def build_tensor(transform, sequences, key, dim, device):
+    """Convert input sequences to tensors."""
+    tensors = []
+    for sequence in sequences:
+        tensors.append(transform(sequence[key]))
 
-    return tensor
+    return torch.stack(tensors, dim=dim).to(device)
 
 
-def transform(data, data_map):
-    """Transform data."""
-    for properties in data_map.values():
-        column_type = properties['type']
-        if column_type in ('continuous', 'count'):
-            value_idx, missing_idx = properties['indices']
-            data[:, :, value_idx] = torch.tanh(data[:, :, value_idx])
-            data[:, :, missing_idx] = torch.sigmoid(data[:, :, missing_idx])
-        elif column_type in ('categorical', 'ordinal'):
-            indices = list(properties['indices'].values())
-            data[:, :, indices] = torch.nn.functional.gumbel_softmax(
-                data[:, :, indices], hard=True)
+def value_to_tensor(tensor, value, properties):
+    """Update the tensor according to the value and properties."""
+    column_type = properties['type']
+    if column_type in ('continuous', 'count'):
+        normalize(tensor, value, properties)
+    elif column_type in ('categorical', 'ordinal'):
+        one_hot_encode(tensor, value, properties)
 
-    return data
+    else:
+        raise ValueError()   # Theoretically unreachable
 
 
-def truncate(generated, data_size):
-    """Truncate generated samples."""
-    end_flag = (generated[:, :, data_size] > 0.5).float().round()
-    generated[:, :, data_size] = end_flag
+def data_to_tensor(data, model_data_size, data_map, fixed_length, max_sequence_length):
+    """Convert the input data to the corresponding tensor.
 
-    for sequence_idx in range(generated.shape[1]):
-        # Pad with zeroes after end_flag == 1
-        sequence = generated[:, sequence_idx]
-        end_flag = sequence[:, data_size]
-        if (end_flag == 1.0).any():
-            cut_idx = end_flag.detach().cpu().numpy().argmax()
-            sequence[cut_idx + 1:] = 0.0
+    If ``self._fixed_length`` is ``False``, add a 1.0 to indicate
+    the sequence end and pad the rest of the sequence with 0.0s.
+    """
+    tensors = []
+    num_rows = len(data[0])
+    for row in range(num_rows):
+        tensor = torch.zeros(model_data_size)
+        for column, properties in data_map.items():
+            value = data[column][row]
+            value_to_tensor(tensor, value, properties)
 
+        tensors.append(tensor)
+
+    if not fixed_length:
+        tensors[-1][-1] = 1.0
+
+    for _ in range(max_sequence_length - num_rows):
+        tensors.append(torch.zeros(model_data_size))
 
-def tensor_to_data_gan(tensor, data_map):
+    return torch.stack(tensors, dim=0)
+
+
+def context_to_tensor(context, context_size, context_map):
+    """Convert the input context to the corresponding tensor."""
+    tensor = torch.zeros(context_size)
+    for column, properties in context_map.items():
+        value = context[column]
+        value_to_tensor(tensor, value, properties)
+
+    return tensor
+
+
+def tensor_to_data(tensor, data_map):
     """Rebuild a valid sequence from the given tensor."""
     sequence_length, num_sequences, _ = tensor.shape
     assert num_sequences == 1
@@ -213,20 +183,8 @@ def tensor_to_data_gan(tensor, data_map):
             elif column_type in ('categorical', 'ordinal'):
                 value = one_hot_decode(tensor, row, properties)
             else:
-                raise ValueError()   # Theoretically unreachable
+                raise ValueError()  # Theoretically unreachable
 
             column_data.append(value)
 
     return data
-
-
-def value_to_tensor(tensor, value, properties):
-    """Update the tensor according to the value and properties."""
-    column_type = properties['type']
-    if column_type in ('continuous', 'count'):
-        normalize(tensor, value, properties)
-    elif column_type in ('categorical', 'ordinal'):
-        one_hot_encode(tensor, value, properties)
-
-    else:
-        raise ValueError()   # Theoretically unreachable

From 1a2991e6265078e9fc2c4da79eb9573c5c4e801a Mon Sep 17 00:00:00 2001
From: CristianCuadrado <c.cuadrado91@gmail.com>
Date: Fri, 4 Sep 2020 15:27:15 +0100
Subject: [PATCH 4/8] add docstrings to utils, modify basic_gan

---
 deepecho/models/basic_gan.py | 200 ++++-------------------------------
 deepecho/models/utils.py     | 127 +++++++++++++++++++---
 2 files changed, 137 insertions(+), 190 deletions(-)

diff --git a/deepecho/models/basic_gan.py b/deepecho/models/basic_gan.py
index e494836..8a60102 100644
--- a/deepecho/models/basic_gan.py
+++ b/deepecho/models/basic_gan.py
@@ -3,11 +3,12 @@
 import logging
 
 import numpy as np
-import pandas as pd
 import torch
 from tqdm import tqdm
 
 from deepecho.models.base import DeepEcho
+from deepecho.models.utils import (
+    build_tensor, context_to_tensor, data_to_tensor, index_map, tensor_to_data)
 
 LOGGER = logging.getLogger(__name__)
 
@@ -24,6 +25,7 @@ class BasicGenerator(torch.nn.Module):
 
     This generator consist on a RNN layer followed by a Linear layer with
     the following schema:
+
         - The Generator takes as input a ``sequence_length`` and a ``context`` vector.
         - The ``context`` vector is expanded over the ``sequence_lenght`` and padded with
           ``latent_size`` random noise.
@@ -32,6 +34,7 @@ class BasicGenerator(torch.nn.Module):
           generates an output of shape ``(sequence_length, context_length, hidden_size)``.
         - The RNN output is passed to the Linear layer that outputs a tensor of size
           ``(sequence_length, context_length, output_size)``
+
     Args:
         context_size (int):
             Size of the contextual arrays.
@@ -76,12 +79,14 @@ class BasicDiscriminator(torch.nn.Module):
 
     This discriminator consist on a RNN layer followed by a Linear layer with
     the following schema:
+
         - The Discriminator takes as input a collection of sequences that include
           both the data and the context columns.
         - RNN takes as input a tensor with shape
           ``(sequence_length, number_of_sequences, context_size + data_size)`` and
           generates an output of shape ``(sequence_length, num_sequences, hidden_size)``.
         - The RNN output is passed to the Linear layer that outputs a single value.
+
     Args:
         context_size (int):
             Number of values in the contextual arrays.
@@ -117,6 +122,7 @@ class BasicGANModel(DeepEcho):
         - apply sigmoid to continuous/count/datetime
         - apply softmax to categorical/ordinal
     4. Define a discriminator that takes sequence + context -> score.
+
     Args:
         epochs (int):
             Number of training epochs. Defaults to 1024.
@@ -186,52 +192,6 @@ def __repr__(self):
     # Preprocessing and preparing #
     # ########################### #
 
-    @staticmethod
-    def _index_map(columns, types):
-        """Decide which dimension will store which column information in the tensor.
-
-        The output of this function has two elements:
-            - An idx_map, which is a dict that indicates the indexes at which
-              the list of tensor dimensions associated with each input column starts,
-              and the properties of such columns.
-            - An integer that indicates how many dimensions the tensor will have.
-        In order to decide this, the following process is followed for each column:
-            - If the column is numerical (continuous or count), 2 dimensions are created
-              for it. These will contain information about the value itself, as well
-              as information about whether the value should be NaN or not.
-            - If the column is categorical or ordinal, 1 dimentions is created for
-              each possible value, which will be later on used to hold one-hot encoding
-              information about the values.
-        """
-        dimensions = 0
-        mapping = {}
-        for column, column_type in enumerate(types):
-            values = columns[column]
-            if column_type in ('continuous', 'count'):
-                mapping[column] = {
-                    'type': column_type,
-                    'min': np.min(values),
-                    'max': np.max(values),
-                    'indices': (dimensions, dimensions + 1)
-                }
-                dimensions += 2
-
-            elif column_type in ('categorical', 'ordinal'):
-                indices = {}
-                for value in set(values):
-                    indices[value] = dimensions
-                    dimensions += 1
-
-                mapping[column] = {
-                    'type': column_type,
-                    'indices': indices
-                }
-
-            else:
-                raise ValueError('Unsupported type: {}'.format(column_type))
-
-        return mapping, dimensions
-
     def _analyze_data(self, sequences, context_types, data_types):
         """Extract information about the context and data that will be used later.
 
@@ -249,142 +209,17 @@ def _analyze_data(self, sequences, context_types, data_types):
         for column in range(len(context_types)):
             context.append([sequence['context'][column] for sequence in sequences])
 
-        self._context_map, self._context_size = self._index_map(context, context_types)
+        self._context_map, self._context_size = index_map(context, context_types)
 
         # Concatenate all the data sequences together
         data = []
         for column in range(len(data_types)):
             data.append(sum([sequence['data'][column] for sequence in sequences], []))
 
-        self._data_map, self._data_size = self._index_map(data, data_types)
+        self._data_map, self._data_size = index_map(data, data_types)
 
         self._model_data_size = self._data_size + int(not self._fixed_length)
 
-    @staticmethod
-    def _normalize(tensor, value, properties):
-        """Normalize the value between 0 and 1 and flag nans."""
-        value_idx, missing_idx = properties['indices']
-        if pd.isnull(value):
-            tensor[value_idx] = 0.0
-            tensor[missing_idx] = 1.0
-        else:
-            column_min = properties['min']
-            column_range = properties['max'] - column_min
-            offset = value - column_min
-            tensor[value_idx] = 2.0 * offset / column_range - 1.0
-            tensor[missing_idx] = 0.0
-
-    @staticmethod
-    def _denormalize(tensor, row, properties, round_value):
-        """Denormalize previously normalized values, setting NaN values if necessary."""
-        value_idx, missing_idx = properties['indices']
-        if tensor[row, 0, missing_idx] > 0.5:
-            return None
-
-        normalized = tensor[row, 0, value_idx].item()
-        column_min = properties['min']
-        column_range = properties['max'] - column_min
-
-        denormalized = (normalized + 1) * column_range / 2.0 + column_min
-        if round_value:
-            denormalized = round(denormalized)
-
-        return denormalized
-
-    @staticmethod
-    def _one_hot_encode(tensor, value, properties):
-        """Update the index that corresponds to the value to 1.0."""
-        value_index = properties['indices'][value]
-        tensor[value_index] = 1.0
-
-    @staticmethod
-    def _one_hot_decode(tensor, row, properties):
-        """Obtain the category that corresponds to the highest one-hot value."""
-        max_value = float('-inf')
-        for category, idx in properties['indices'].items():
-            value = tensor[row, 0, idx]
-            if value > max_value:
-                max_value = value
-                selected = category
-
-        return selected
-
-    def _value_to_tensor(self, tensor, value, properties):
-        """Update the tensor according to the value and properties."""
-        column_type = properties['type']
-        if column_type in ('continuous', 'count'):
-            self._normalize(tensor, value, properties)
-        elif column_type in ('categorical', 'ordinal'):
-            self._one_hot_encode(tensor, value, properties)
-
-        else:
-            raise ValueError()   # Theoretically unreachable
-
-    def _data_to_tensor(self, data):
-        """Convert the input data to the corresponding tensor.
-
-        If ``self._fixed_length`` is ``False``, add a 1.0 to indicate
-        the sequence end and pad the rest of the sequence with 0.0s.
-        """
-        tensors = []
-        num_rows = len(data[0])
-        for row in range(num_rows):
-            tensor = torch.zeros(self._model_data_size)
-            for column, properties in self._data_map.items():
-                value = data[column][row]
-                self._value_to_tensor(tensor, value, properties)
-
-            tensors.append(tensor)
-
-        if not self._fixed_length:
-            tensors[-1][-1] = 1.0
-
-        for _ in range(self._max_sequence_length - num_rows):
-            tensors.append(torch.zeros(self._model_data_size))
-
-        return torch.stack(tensors, dim=0)
-
-    def _context_to_tensor(self, context):
-        """Convert the input context to the corresponding tensor."""
-        tensor = torch.zeros(self._context_size)
-        for column, properties in self._context_map.items():
-            value = context[column]
-            self._value_to_tensor(tensor, value, properties)
-
-        return tensor
-
-    def _tensor_to_data(self, tensor):
-        """Rebuild a valid sequence from the given tensor."""
-        sequence_length, num_sequences, _ = tensor.shape
-        assert num_sequences == 1
-
-        data = [None] * len(self._data_map)
-        for column, properties in self._data_map.items():
-            column_type = properties['type']
-
-            column_data = []
-            data[column] = column_data
-            for row in range(sequence_length):
-                if column_type in ('continuous', 'count'):
-                    round_value = column_type == 'count'
-                    value = self._denormalize(tensor, row, properties, round_value=round_value)
-                elif column_type in ('categorical', 'ordinal'):
-                    value = self._one_hot_decode(tensor, row, properties)
-                else:
-                    raise ValueError()   # Theoretically unreachable
-
-                column_data.append(value)
-
-        return data
-
-    def _build_tensor(self, transform, sequences, key, dim):
-        """Convert input sequences to tensors."""
-        tensors = []
-        for sequence in sequences:
-            tensors.append(transform(sequence[key]))
-
-        return torch.stack(tensors, dim=dim).to(self._device)
-
     # ################## #
     # GAN Training steps #
     # ################## #
@@ -483,6 +318,7 @@ def fit_sequences(self, sequences, context_types, data_types):
                 List of sequences. Each sequence is a single training example
                 (i.e. an example of a multivariate time series with some context).
                 For example, a sequence might look something like::
+
                     {
                         "context": [1],
                         "data": [
@@ -491,9 +327,11 @@ def fit_sequences(self, sequences, context_types, data_types):
                             [1, 3, 4, 5,  2, 3, 1]
                         ]
                     }
+
                 The "context" attribute maps to a list of variables which
                 should be used for conditioning. These are variables which
                 do not change over time.
+
                 The "data" attribute contains a list of lists corrsponding
                 to the actual time series data such that `data[i][j]` contains
                 the value at the jth time step of the ith channel of the
@@ -510,8 +348,12 @@ def fit_sequences(self, sequences, context_types, data_types):
         """
         self._analyze_data(sequences, context_types, data_types)
 
-        data = self._build_tensor(self._data_to_tensor, sequences, 'data', dim=1)
-        context = self._build_tensor(self._context_to_tensor, sequences, 'context', dim=0)
+        data = build_tensor(data_to_tensor, sequences, 'data', dim=1, device=self._device,
+                            model_data_size=self._model_data_size, data_map=self._data_map,
+                            fixed_length=self._fixed_length,
+                            max_sequence_length=self._max_sequence_length)
+        context = build_tensor(context_to_tensor, sequences, 'context', dim=0, device=self._device,
+                               context_size=self._context_size, context_map=self._context_map)
         data_context = _expand_context(data, context)
 
         discriminator, generator_opt, discriminator_opt = self._build_fit_artifacts()
@@ -547,12 +389,14 @@ def sample_sequence(self, context, sequence_length=None):
             context (list):
                 The list of values to condition on. It must match
                 the types specified in context_types when fit was called.
+
         Returns:
             list[list]:
                 A list of lists (data) corresponding to the types specified
                 in data_types when fit was called.
         """
-        context = self._context_to_tensor(context).unsqueeze(0).to(self._device)
+        context = context_to_tensor(context, self._context_size, self._context_map)\
+            .unsqueeze(0).to(self._device)
 
         with torch.no_grad():
             generated = self._generate(context, sequence_length)
@@ -562,4 +406,4 @@ def sample_sequence(self, context, sequence_length=None):
                     cut_index = end_flag.cpu().numpy().argmax()
                     generated = generated[:cut_index, :, :]
 
-            return self._tensor_to_data(generated)
+            return tensor_to_data(generated, self._data_map)
diff --git a/deepecho/models/utils.py b/deepecho/models/utils.py
index 8da71f9..3bbc773 100644
--- a/deepecho/models/utils.py
+++ b/deepecho/models/utils.py
@@ -56,9 +56,15 @@ def index_map(columns, types):
 
 
 def normalize(tensor, value, properties):
-    """Normalize value and flag nans.
-
-    Normalize between -1 and 1.
+    """Normalize value and flag nans. Normalized values are between -1 and 1.
+
+    Args:
+        tensor (array):
+            Vector to store normalize values and recording null values position.
+        value (float):
+            Value to normalize.
+        properties (dict):
+            Contains information related to the value category.
     """
     value_idx, missing_idx = properties['indices']
     if pd.isnull(value):
@@ -74,7 +80,22 @@ def normalize(tensor, value, properties):
 
 
 def denormalize(tensor, row, properties, round_value):
-    """Denormalize previously normalized values, setting NaN values if necessary."""
+    """Denormalize previously normalized values, setting NaN values if necessary.
+
+    Args:
+        tensor (array):
+            3D Vector that contains different samples with normalized values
+            and record of null values.
+        row (int):
+            Sample to denormalize
+        properties (dict):
+            Contains information related to the value category.
+        round_value(boolean):
+            Apply round to the denormalized value or not.
+    Return:
+        denormalized(float)
+            Return the denormalized value.
+    """
     value_idx, missing_idx = properties['indices']
     if tensor[row, 0, missing_idx] > 0.5:
         return None
@@ -92,13 +113,34 @@ def denormalize(tensor, row, properties, round_value):
 
 
 def one_hot_encode(tensor, value, properties):
-    """Update the index that corresponds to the value to 1.0."""
+    """Update the index that corresponds to the value to 1.0.
+
+    Args:
+        tensor (array):
+            Vector to store one hot encoding.
+        value (int):
+            Categorical variable key
+        properties (dict):
+            Contains information related to the value category.
+    """
     value_index = properties['indices'][value]
     tensor[value_index] = 1.0
 
 
 def one_hot_decode(tensor, row, properties):
-    """Obtain the category that corresponds to the highest one-hot value."""
+    """Obtain the category that corresponds to the highest one-hot value.
+
+    Args:
+        tensor (array):
+            Vector that store one hot encoding for different samples.
+        row (int):
+            Indicates the sample.
+        properties (dict):
+            Contains information related to the value category.
+    Returns:
+        selected(int):
+        Category selected.
+    """
     max_value = float('-inf')
     for category, idx in properties['indices'].items():
         value = tensor[row, 0, idx]
@@ -109,17 +151,43 @@ def one_hot_decode(tensor, row, properties):
     return selected
 
 
-def build_tensor(transform, sequences, key, dim, device):
-    """Convert input sequences to tensors."""
+def build_tensor(transform, sequences, key, dim, device, **transform_kwargs):
+    """Convert input sequences to tensors.
+
+    Args:
+        transform (function):
+            Function to apply.
+        sequences (dict):
+            Contains data samples.
+        key (str):
+            Indicates with information pass to the function from variable 'sequence'.
+        dim(int)
+            Dimension to insert.
+        device(torch.device)
+            Indicate available device.
+        **transform_kwargs(dict)
+            Contains input variables for the function passed by 'transform'.
+    Returns:
+        3D torch vector, with all samples concatenated.
+    """
     tensors = []
     for sequence in sequences:
-        tensors.append(transform(sequence[key]))
+        tensors.append(transform(sequence[key], **transform_kwargs))
 
     return torch.stack(tensors, dim=dim).to(device)
 
 
 def value_to_tensor(tensor, value, properties):
-    """Update the tensor according to the value and properties."""
+    """Update the tensor according to the value and properties.
+
+    Args:
+        tensor (array):
+            Vector to store the values and recording null values position.
+        value (float):
+            Value to normalize.
+        properties (dict):
+            Contains information related to the value category.
+    """
     column_type = properties['type']
     if column_type in ('continuous', 'count'):
         normalize(tensor, value, properties)
@@ -135,6 +203,20 @@ def data_to_tensor(data, model_data_size, data_map, fixed_length, max_sequence_l
 
     If ``self._fixed_length`` is ``False``, add a 1.0 to indicate
     the sequence end and pad the rest of the sequence with 0.0s.
+
+    Args:
+        data (list):
+            List of arrays of input data.
+        model_data_size(int):
+            Dimension of tensors.
+        data_map (dict):
+            Contains information related to the value category.
+        fixed_length(Boolean):
+            Define samples length.
+        max_sequence_length():
+            Define the length of the biggest sequence.
+    Return:
+        2D torch vector, with all samples concatenated.
     """
     tensors = []
     num_rows = len(data[0])
@@ -156,7 +238,19 @@ def data_to_tensor(data, model_data_size, data_map, fixed_length, max_sequence_l
 
 
 def context_to_tensor(context, context_size, context_map):
-    """Convert the input context to the corresponding tensor."""
+    """Convert the input context to the corresponding tensor.
+
+    Args:
+        context (array):
+            Context context information.
+        context_size(int):
+            Define 'tensor' size.
+        context_map (dict):
+            Contains information related to the value category.
+    Return:
+         tensor(torch tensor):
+            3D array, contains the concatenated samples
+    """
     tensor = torch.zeros(context_size)
     for column, properties in context_map.items():
         value = context[column]
@@ -166,7 +260,16 @@ def context_to_tensor(context, context_size, context_map):
 
 
 def tensor_to_data(tensor, data_map):
-    """Rebuild a valid sequence from the given tensor."""
+    """Rebuild a valid sequence from the given tensor.
+
+    Args:
+        tensor (list):
+            List of arrays of input data.
+        data_map(int):
+            Dimension of tensors.
+    Return:
+         data
+    """
     sequence_length, num_sequences, _ = tensor.shape
     assert num_sequences == 1
 

From 3dd7fa583d12ff0760f3d49a6d4ed3a7cfb64d03 Mon Sep 17 00:00:00 2001
From: CristianCuadrado <c.cuadrado91@gmail.com>
Date: Fri, 4 Sep 2020 15:32:33 +0100
Subject: [PATCH 5/8] modify utils

---
 deepecho/models/utils.py | 52 ++++++++++++++++++++--------------------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/deepecho/models/utils.py b/deepecho/models/utils.py
index 3bbc773..b43e2f2 100644
--- a/deepecho/models/utils.py
+++ b/deepecho/models/utils.py
@@ -151,32 +151,6 @@ def one_hot_decode(tensor, row, properties):
     return selected
 
 
-def build_tensor(transform, sequences, key, dim, device, **transform_kwargs):
-    """Convert input sequences to tensors.
-
-    Args:
-        transform (function):
-            Function to apply.
-        sequences (dict):
-            Contains data samples.
-        key (str):
-            Indicates with information pass to the function from variable 'sequence'.
-        dim(int)
-            Dimension to insert.
-        device(torch.device)
-            Indicate available device.
-        **transform_kwargs(dict)
-            Contains input variables for the function passed by 'transform'.
-    Returns:
-        3D torch vector, with all samples concatenated.
-    """
-    tensors = []
-    for sequence in sequences:
-        tensors.append(transform(sequence[key], **transform_kwargs))
-
-    return torch.stack(tensors, dim=dim).to(device)
-
-
 def value_to_tensor(tensor, value, properties):
     """Update the tensor according to the value and properties.
 
@@ -291,3 +265,29 @@ def tensor_to_data(tensor, data_map):
             column_data.append(value)
 
     return data
+
+
+def build_tensor(transform, sequences, key, dim, device, **transform_kwargs):
+    """Convert input sequences to tensors.
+
+    Args:
+        transform (function):
+            Function to apply.
+        sequences (dict):
+            Contains data samples.
+        key (str):
+            Indicates with information pass to the function from variable 'sequence'.
+        dim(int)
+            Dimension to insert.
+        device(torch.device)
+            Indicate available device.
+        **transform_kwargs(dict)
+            Contains input variables for the function passed by 'transform'.
+    Returns:
+        3D torch vector, with all samples concatenated.
+    """
+    tensors = []
+    for sequence in sequences:
+        tensors.append(transform(sequence[key], **transform_kwargs))
+
+    return torch.stack(tensors, dim=dim).to(device)

From 1c350e38f569500c10ddc047b012f37e62a7b150 Mon Sep 17 00:00:00 2001
From: CristianCuadrado <c.cuadrado91@gmail.com>
Date: Mon, 7 Sep 2020 11:24:34 +0100
Subject: [PATCH 6/8] utils and basic_gan last improvements

---
 deepecho/models/basic_gan.py | 32 +++++++++++++++------
 deepecho/models/utils.py     | 54 ++++++++++++++++++++++++------------
 2 files changed, 59 insertions(+), 27 deletions(-)

diff --git a/deepecho/models/basic_gan.py b/deepecho/models/basic_gan.py
index 8a60102..b37b0b3 100644
--- a/deepecho/models/basic_gan.py
+++ b/deepecho/models/basic_gan.py
@@ -348,12 +348,27 @@ def fit_sequences(self, sequences, context_types, data_types):
         """
         self._analyze_data(sequences, context_types, data_types)
 
-        data = build_tensor(data_to_tensor, sequences, 'data', dim=1, device=self._device,
-                            model_data_size=self._model_data_size, data_map=self._data_map,
-                            fixed_length=self._fixed_length,
-                            max_sequence_length=self._max_sequence_length)
-        context = build_tensor(context_to_tensor, sequences, 'context', dim=0, device=self._device,
-                               context_size=self._context_size, context_map=self._context_map)
+        data = build_tensor(
+            transform=data_to_tensor,
+            sequences=sequences,
+            key='data',
+            dim=1,
+            device=self._device,
+            model_data_size=self._model_data_size,
+            data_map=self._data_map,
+            fixed_length=self._fixed_length,
+            max_sequence_length=self._max_sequence_length
+        )
+
+        context = build_tensor(
+            transform=context_to_tensor,
+            sequences=sequences,
+            key='context',
+            dim=0,
+            context_size=self._context_size,
+            context_map=self._context_map
+        ).to(self._device)
+
         data_context = _expand_context(data, context)
 
         discriminator, generator_opt, discriminator_opt = self._build_fit_artifacts()
@@ -395,9 +410,8 @@ def sample_sequence(self, context, sequence_length=None):
                 A list of lists (data) corresponding to the types specified
                 in data_types when fit was called.
         """
-        context = context_to_tensor(context, self._context_size, self._context_map)\
-            .unsqueeze(0).to(self._device)
-
+        context_tensor = context_to_tensor(context, self._context_size, self._context_map)
+        context = context_tensor.unsqueeze(0).to(self._device)
         with torch.no_grad():
             generated = self._generate(context, sequence_length)
             if sequence_length is None:
diff --git a/deepecho/models/utils.py b/deepecho/models/utils.py
index b43e2f2..a5efccd 100644
--- a/deepecho/models/utils.py
+++ b/deepecho/models/utils.py
@@ -1,5 +1,4 @@
 """Utils for models."""
-# pylint: disable-all
 
 import numpy as np
 import pandas as pd
@@ -11,7 +10,7 @@ def index_map(columns, types):
 
     The output of this function has two elements:
 
-        - An idx_map, which is a dict that indicates the indexes at which
+        - An 'mapping', which is a dict that indicates the indexes at which
           the list of tensor dimensions associated with each input column starts,
           and the properties of such columns.
         - An integer that indicates how many dimensions the tensor will have.
@@ -24,6 +23,18 @@ def index_map(columns, types):
         - If the column is categorical or ordinal, 1 dimentions is created for
           each possible value, which will be later on used to hold one-hot encoding
           information about the values.
+
+    Args:
+        columns(list):
+            Data contained in the associate column.
+        types(list):
+            Contains information about 'columns' type.
+
+    Returns:
+        dict:
+            Contains information related to the properties of the columns data.
+        int:
+            Indicates how many dimensions the tensor will have
     """
     dimensions = 0
     mapping = {}
@@ -92,9 +103,10 @@ def denormalize(tensor, row, properties, round_value):
             Contains information related to the value category.
         round_value(boolean):
             Apply round to the denormalized value or not.
-    Return:
-        denormalized(float)
-            Return the denormalized value.
+
+    Returns:
+        float:
+            Denormalized value.
     """
     value_idx, missing_idx = properties['indices']
     if tensor[row, 0, missing_idx] > 0.5:
@@ -137,9 +149,10 @@ def one_hot_decode(tensor, row, properties):
             Indicates the sample.
         properties (dict):
             Contains information related to the value category.
+
     Returns:
-        selected(int):
-        Category selected.
+        int:
+            Category selected.
     """
     max_value = float('-inf')
     for category, idx in properties['indices'].items():
@@ -189,8 +202,10 @@ def data_to_tensor(data, model_data_size, data_map, fixed_length, max_sequence_l
             Define samples length.
         max_sequence_length():
             Define the length of the biggest sequence.
-    Return:
-        2D torch vector, with all samples concatenated.
+
+    Returns:
+        torch tensor:
+            All samples concatenated.
     """
     tensors = []
     num_rows = len(data[0])
@@ -221,8 +236,9 @@ def context_to_tensor(context, context_size, context_map):
             Define 'tensor' size.
         context_map (dict):
             Contains information related to the value category.
-    Return:
-         tensor(torch tensor):
+
+    Returns:
+         torch tensor:
             3D array, contains the concatenated samples
     """
     tensor = torch.zeros(context_size)
@@ -241,8 +257,10 @@ def tensor_to_data(tensor, data_map):
             List of arrays of input data.
         data_map(int):
             Dimension of tensors.
-    Return:
-         data
+
+    Returns:
+        list:
+            data sequence
     """
     sequence_length, num_sequences, _ = tensor.shape
     assert num_sequences == 1
@@ -267,7 +285,7 @@ def tensor_to_data(tensor, data_map):
     return data
 
 
-def build_tensor(transform, sequences, key, dim, device, **transform_kwargs):
+def build_tensor(transform, sequences, key, dim, **transform_kwargs):
     """Convert input sequences to tensors.
 
     Args:
@@ -279,15 +297,15 @@ def build_tensor(transform, sequences, key, dim, device, **transform_kwargs):
             Indicates with information pass to the function from variable 'sequence'.
         dim(int)
             Dimension to insert.
-        device(torch.device)
-            Indicate available device.
         **transform_kwargs(dict)
             Contains input variables for the function passed by 'transform'.
+
     Returns:
-        3D torch vector, with all samples concatenated.
+        torch tensor:
+            All samples concatenated.
     """
     tensors = []
     for sequence in sequences:
         tensors.append(transform(sequence[key], **transform_kwargs))
 
-    return torch.stack(tensors, dim=dim).to(device)
+    return torch.stack(tensors, dim=dim)

From a4b53a22eb18280a6dd949acb0b0f1482857316f Mon Sep 17 00:00:00 2001
From: CristianCuadrado <c.cuadrado91@gmail.com>
Date: Mon, 7 Sep 2020 11:55:34 +0100
Subject: [PATCH 7/8] improve docstrings in utils, modifications in basic_gan

---
 deepecho/models/basic_gan.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/deepecho/models/basic_gan.py b/deepecho/models/basic_gan.py
index b37b0b3..a022f9b 100644
--- a/deepecho/models/basic_gan.py
+++ b/deepecho/models/basic_gan.py
@@ -353,12 +353,11 @@ def fit_sequences(self, sequences, context_types, data_types):
             sequences=sequences,
             key='data',
             dim=1,
-            device=self._device,
             model_data_size=self._model_data_size,
             data_map=self._data_map,
             fixed_length=self._fixed_length,
             max_sequence_length=self._max_sequence_length
-        )
+        ).to(self._device)
 
         context = build_tensor(
             transform=context_to_tensor,

From 5b091e15dbce6c9766c4102e498492f7baece3fd Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Wed, 9 Sep 2020 14:31:50 +0200
Subject: [PATCH 8/8] Update docstrings

---
 deepecho/models/utils.py | 119 ++++++++++++++++++++-------------------
 1 file changed, 62 insertions(+), 57 deletions(-)

diff --git a/deepecho/models/utils.py b/deepecho/models/utils.py
index a5efccd..36a6fc6 100644
--- a/deepecho/models/utils.py
+++ b/deepecho/models/utils.py
@@ -25,16 +25,15 @@ def index_map(columns, types):
           information about the values.
 
     Args:
-        columns(list):
-            Data contained in the associate column.
-        types(list):
-            Contains information about 'columns' type.
+        columns (list):
+            List of lists containing the values of each column.
+        types (list):
+            List of strings containing the type of each column.
 
     Returns:
-        dict:
-            Contains information related to the properties of the columns data.
-        int:
-            Indicates how many dimensions the tensor will have
+        tuple:
+            * ``dict``: Information related to the properties of the columns data.
+            * ``int``: Number of dimensions the that tensor will have.
     """
     dimensions = 0
     mapping = {}
@@ -67,15 +66,18 @@ def index_map(columns, types):
 
 
 def normalize(tensor, value, properties):
-    """Normalize value and flag nans. Normalized values are between -1 and 1.
+    """Normalize value and flag nans.
+
+    Normalized values are between -1 and 1.
 
     Args:
         tensor (array):
-            Vector to store normalize values and recording null values position.
+            Tensor in which the normalized values will be stored.
         value (float):
             Value to normalize.
         properties (dict):
-            Contains information related to the value category.
+            Dictionary with information related to the given value,
+            which must contain the indices and the min/max values.
     """
     value_idx, missing_idx = properties['indices']
     if pd.isnull(value):
@@ -96,13 +98,14 @@ def denormalize(tensor, row, properties, round_value):
     Args:
         tensor (array):
             3D Vector that contains different samples with normalized values
-            and record of null values.
+            and records of null values.
         row (int):
-            Sample to denormalize
+            Index of the row that needs to be decoded.
         properties (dict):
-            Contains information related to the value category.
-        round_value(boolean):
-            Apply round to the denormalized value or not.
+            Dictionary with information related to the given value,
+            which must contain the indices and the min/max values.
+        round_value (boolean):
+            Whether to round the denormalized value or not.
 
     Returns:
         float:
@@ -125,15 +128,16 @@ def denormalize(tensor, row, properties, round_value):
 
 
 def one_hot_encode(tensor, value, properties):
-    """Update the index that corresponds to the value to 1.0.
+    """Set 1.0 at the tensor index that corresponds to the value.
 
     Args:
         tensor (array):
-            Vector to store one hot encoding.
+            Tensor that will be updated.
         value (int):
-            Categorical variable key
+            Value that needs to be one-hot encoded.
         properties (dict):
-            Contains information related to the value category.
+            Dictionary with information related to the given value,
+            which must contain the indices of the values.
     """
     value_index = properties['indices'][value]
     tensor[value_index] = 1.0
@@ -144,15 +148,16 @@ def one_hot_decode(tensor, row, properties):
 
     Args:
         tensor (array):
-            Vector that store one hot encoding for different samples.
+            Tensor which contains the one-hot encoded rows.
         row (int):
-            Indicates the sample.
+            Index of the row that needs to be decoded.
         properties (dict):
-            Contains information related to the value category.
+            Dictionary with information related to the given value,
+            which must contain the indices of the values.
 
     Returns:
         int:
-            Category selected.
+            Decoded category value.
     """
     max_value = float('-inf')
     for category, idx in properties['indices'].items():
@@ -169,11 +174,12 @@ def value_to_tensor(tensor, value, properties):
 
     Args:
         tensor (array):
-            Vector to store the values and recording null values position.
+            Tensor in which the encoded or normalized values will be stored.
         value (float):
-            Value to normalize.
+            Value to encode or normalize.
         properties (dict):
-            Contains information related to the value category.
+            Dictionary with information related to the given value,
+            which must contain the indices and min/max of the values.
     """
     column_type = properties['type']
     if column_type in ('continuous', 'count'):
@@ -188,24 +194,24 @@ def value_to_tensor(tensor, value, properties):
 def data_to_tensor(data, model_data_size, data_map, fixed_length, max_sequence_length):
     """Convert the input data to the corresponding tensor.
 
-    If ``self._fixed_length`` is ``False``, add a 1.0 to indicate
+    If ``fixed_length`` is ``False``, add a 1.0 to indicate
     the sequence end and pad the rest of the sequence with 0.0s.
 
     Args:
         data (list):
-            List of arrays of input data.
-        model_data_size(int):
-            Dimension of tensors.
+            List of lists containing the input sequences.
+        model_data_size (int):
+            Number of columns to create in the tensor.
         data_map (dict):
-            Contains information related to the value category.
-        fixed_length(Boolean):
-            Define samples length.
-        max_sequence_length():
-            Define the length of the biggest sequence.
+            Dictionary with information related to the data variables,
+            which must contain the indices and min/max of the values.
+        fixed_length (boolean):
+            Whether to add an end flag column or not.
+        max_sequence_length (int):
+            Maximum sequence length.
 
     Returns:
-        torch tensor:
-            All samples concatenated.
+        torch.tensor
     """
     tensors = []
     num_rows = len(data[0])
@@ -230,16 +236,16 @@ def context_to_tensor(context, context_size, context_map):
     """Convert the input context to the corresponding tensor.
 
     Args:
-        context (array):
-            Context context information.
-        context_size(int):
-            Define 'tensor' size.
+        context (list):
+            List containing the context values.
+        context_size (int):
+            Size of the output tensor.
         context_map (dict):
-            Contains information related to the value category.
+            Dictionary with information related to the context variables,
+            which must contain the indices and min/max of the values.
 
     Returns:
-         torch tensor:
-            3D array, contains the concatenated samples
+         torch.tensor
     """
     tensor = torch.zeros(context_size)
     for column, properties in context_map.items():
@@ -254,13 +260,13 @@ def tensor_to_data(tensor, data_map):
 
     Args:
         tensor (list):
-            List of arrays of input data.
-        data_map(int):
-            Dimension of tensors.
+            Tensor containing the generated data.
+        data_map (int):
+            Dictionary with information related to the data variables,
+            which must contain the indices and min/max of the values.
 
     Returns:
-        list:
-            data sequence
+        list
     """
     sequence_length, num_sequences, _ = tensor.shape
     assert num_sequences == 1
@@ -292,17 +298,16 @@ def build_tensor(transform, sequences, key, dim, **transform_kwargs):
         transform (function):
             Function to apply.
         sequences (dict):
-            Contains data samples.
+            Dict containing the sequences and the context vectors.
         key (str):
-            Indicates with information pass to the function from variable 'sequence'.
-        dim(int)
-            Dimension to insert.
+            Key to use when obtaining the data from the sequences dict.
+        dim (int)
+            Dimension to use when the tensors are stacked.
         **transform_kwargs(dict)
-            Contains input variables for the function passed by 'transform'.
+            Additional arguments for the ``transform`` function.
 
     Returns:
-        torch tensor:
-            All samples concatenated.
+        torch tensor
     """
     tensors = []
     for sequence in sequences: