From ec01e1d677354c832d6738c01ef94554e43b7051 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 30 Jan 2020 14:03:17 -0500
Subject: [PATCH 001/171] Add function to load demo data

---
 greenguard/demo.py | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)
 create mode 100644 greenguard/demo.py

diff --git a/greenguard/demo.py b/greenguard/demo.py
new file mode 100644
index 0000000..369422b
--- /dev/null
+++ b/greenguard/demo.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+
+import logging
+import os
+
+import pandas as pd
+
+LOGGER = logging.getLogger(__name__)
+
+S3_URL = 'https://d3-ai-greenguard.s3.amazonaws.com/'
+DEMO_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'demo')
+
+
+def _load_or_download(filename, dates):
+    filename += '.csv.gz'
+    file_path = os.path.join(DEMO_PATH, filename)
+    if os.path.exists(file_path):
+        return pd.read_csv(file_path, compression='gzip', parse_dates=[dates])
+
+    os.makedirs(DEMO_PATH, exist_ok=True)
+    url = S3_URL + filename
+
+    LOGGER.info('Downloading %s from %s', filename, url)
+    data = pd.read_csv(url, compression='gzip', parse_dates=[dates])
+    data.to_csv(file_path, index=False, compression='gzip')
+
+    return data
+
+
+def load_demo():
+    """Load the demo included in the GreenGuard project.
+    The first time that this function is executed, the data will be downloaded
+    and cached inside the `greenguard/demo` folder.
+    Subsequent calls will load the cached data instead of downloading it again.
+    """
+    target_times = _load_or_download('target_times', 'cutoff_time')
+    readings = _load_or_download('readings', 'timestamp')
+
+    return target_times, readings

From bfc24cd2abde6fd9181c374ad2cac6b38b86ece1 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 30 Jan 2020 14:04:24 -0500
Subject: [PATCH 002/171] Add functions to work with target_times

---
 greenguard/__init__.py |   5 +-
 greenguard/data.py     | 225 -----------------------------------------
 greenguard/targets.py  | 155 ++++++++++++++++++++++++++++
 3 files changed, 157 insertions(+), 228 deletions(-)
 delete mode 100644 greenguard/data.py
 create mode 100644 greenguard/targets.py

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index fbc6e9a..1eab417 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -8,7 +8,7 @@
 
 import os
 
-from greenguard.data import extract_readings, make_targets
+from greenguard.demo import load_demo
 from greenguard.pipeline import GreenGuardPipeline, get_pipelines
 
 _BASE_PATH = os.path.abspath(os.path.dirname(__file__))
@@ -18,6 +18,5 @@
 __all__ = (
     'GreenGuardPipeline',
     'get_pipelines',
-    'extract_readings',
-    'make_targets'
+    'load_demo'
 )
diff --git a/greenguard/data.py b/greenguard/data.py
deleted file mode 100644
index 21e0b7d..0000000
--- a/greenguard/data.py
+++ /dev/null
@@ -1,225 +0,0 @@
-"""Data module.
-
-This module contains functions to work directly with GreenGuard data in raw format.
-
-This raw format has the following characteristics:
-
-    * All the data from all the turbines is inside a single folder.
-    * Inside the data folder, a folder exists for each turbine.
-      This folders are named exactly like each turbine id, and inside it one or more
-      CSV files can be found. The names of these files is not relevant.
-    * Each CSV file will have the the following columns:
-
-        * timestamp: timestemp of the reading.
-        * signal: name or id of the signal.
-        * value: value of the reading.
-"""
-
-import logging
-import os
-import warnings
-from datetime import datetime
-
-import numpy as np
-import pandas as pd
-
-LOGGER = logging.getLogger(__name__)
-
-
-def make_targets(target_times, window_size, target, new_targets=None):
-    target_times = target_times.sort_values('cutoff_time', ascending=True)
-    cutoff_times = target_times.cutoff_time
-    window_size = pd.to_timedelta(window_size)
-    original_size = len(target_times)
-    current_size = original_size
-    new_targets = new_targets or current_size
-
-    for index in range(len(cutoff_times) - 1):
-        timestamp = cutoff_times.iloc[index]
-        next_time = cutoff_times.iloc[index + 1]
-
-        if timestamp + (window_size * 2) >= next_time:
-            continue
-
-        span_start = timestamp + window_size
-        span_end = next_time - window_size
-        span_length = (span_end - span_start).total_seconds()
-
-        delay = pd.to_timedelta(np.random.randint(span_length), unit='s')
-        cutoff_time = span_start + delay
-
-        target_times = target_times.append(pd.Series({
-            'turbine_id': target_times.iloc[index].turbine_id,
-            'cutoff_time': cutoff_time,
-            'target': target
-        }), ignore_index=True)
-
-        current_size = len(target_times)
-        if current_size == original_size + new_targets:
-            return target_times.sort_values('cutoff_time', ascending=True)
-
-    if current_size == original_size:
-        warnings.warn('There is no space left between to add more targets.')
-        return target_times
-
-    new_targets = new_targets - (current_size - original_size)
-    return make_targets(target_times, window_size, target, new_targets)
-
-
-def _filter_by_filename(target_times, filenames):
-    max_csv = target_times.end.dt.strftime('%Y-%m-.csv')
-    min_csv = target_times.start.dt.strftime('%Y-%m-.csv')
-
-    for filename in filenames:
-        if ((min_csv <= filename) & (filename <= max_csv)).any():
-            yield filename
-
-
-def _load_readings_file(turbine_file):
-    LOGGER.info('Loading file %s', turbine_file)
-    data = pd.read_csv(turbine_file)
-    data.columns = data.columns.str.lower()
-    data.rename(columns={'signal': 'signal_id'}, inplace=True)
-
-    if 'unnamed: 0' in data.columns:
-        # Someone forgot to drop the index before
-        # storing the DataFrame as a CSV
-        del data['unnamed: 0']
-
-    LOGGER.info('Loaded %s readings from file %s', len(data), turbine_file)
-
-    return data
-
-
-def _filter_by_signal(data, signals):
-    if signals is not None:
-        LOGGER.info('Filtering by signal')
-        data = data[data.signal_id.isin(signals.signal_id)]
-
-    LOGGER.info('Selected %s readings by signal', len(data))
-
-    return data
-
-
-def _filter_by_timestamp(data, target_times):
-    LOGGER.info('Parsing timestamps')
-    timestamps = pd.to_datetime(data['timestamp'], format='%m/%d/%y %H:%M:%S')
-    data['timestamp'] = timestamps
-
-    LOGGER.info('Filtering by timestamp')
-
-    related = [False] * len(timestamps)
-    for row in target_times.itertuples():
-        related |= (row.start <= timestamps) & (timestamps <= row.end)
-
-    data = data[related]
-
-    LOGGER.info('Selected %s readings by timestamp', len(data))
-
-    return data
-
-
-def _load_turbine_readings(readings_path, target_times, signals):
-    turbine_id = target_times.turbine_id.iloc[0]
-    turbine_path = os.path.join(readings_path, turbine_id)
-    filenames = sorted(os.listdir(turbine_path))
-    filenames = _filter_by_filename(target_times, filenames)
-
-    readings = list()
-    for readings_file in filenames:
-        readings_file_path = os.path.join(turbine_path, readings_file)
-        data = _load_readings_file(readings_file_path)
-        data = _filter_by_signal(data, signals)
-        data = _filter_by_timestamp(data, target_times)
-
-        readings.append(data)
-
-    if readings:
-        readings = pd.concat(readings)
-    else:
-        readings = pd.DataFrame(columns=['timestamp', 'signal_id', 'value', 'turbine_id'])
-
-    LOGGER.info('Loaded %s readings from turbine %s', len(readings), turbine_id)
-
-    return readings
-
-
-def _get_times(target_times, window_size):
-    cutoff_times = target_times.cutoff_time
-    if window_size:
-        window_size = pd.to_timedelta(window_size)
-        min_times = cutoff_times - window_size
-    else:
-        min_times = [datetime.min] * len(cutoff_times)
-
-    return pd.DataFrame({
-        'turbine_id': target_times.turbine_id,
-        'start': min_times,
-        'end': cutoff_times,
-    })
-
-
-def _load_readings(readings_path, target_times, signals, window_size):
-    turbine_ids = target_times.turbine_id.unique()
-
-    target_times = _get_times(target_times, window_size)
-
-    readings = list()
-    for turbine_id in sorted(turbine_ids):
-        turbine_target_times = target_times[target_times['turbine_id'] == turbine_id]
-        LOGGER.info('Loading turbine %s readings', turbine_id)
-        turbine_readings = _load_turbine_readings(readings_path, turbine_target_times, signals)
-        turbine_readings['turbine_id'] = turbine_id
-        readings.append(turbine_readings)
-
-    return pd.concat(readings)
-
-
-def extract_readings(readings_path, target_times, signals=None, window_size=None):
-    """Extract raw readings data for the given target_times.
-
-    The ``target_times`` table is examined to decide from which turbines found
-    in the ``reading_pathp`` which data to load.
-
-    And the output is a ``pandas.DataFrame`` containing:
-
-        * `turbine_id`: Unique identifier of the turbine which this reading comes from.
-        * `signal_id`: Unique identifier of the signal which this reading comes from.
-        * `timestamp`: Time where the reading took place, as an ISO formatted datetime.
-        * `value`: Numeric value of this reading.
-
-    Args:
-        readings_path (str):
-            Path to the folder containing all the readings data.
-        target_times (pd.DataFrame or str):
-            target_times DataFrame or path to the target_times CSV file.
-        signals (list):
-            List of signals to load from the readings files. If not given, load
-            all the signals available.
-        window_size (str):
-            Rule indicating how long back before the cutoff times we have to go
-            when loading the data.
-
-    Returns:
-        pandas.DataFrame
-    """
-    if isinstance(target_times, pd.DataFrame):
-        target_times = target_times.copy()
-    else:
-        target_times = pd.read_csv(target_times)
-
-    target_times['cutoff_time'] = pd.to_datetime(target_times['cutoff_time'])
-
-    without_duplicates = target_times.drop_duplicates(subset=['cutoff_time', 'turbine_id'])
-    if len(target_times) != len(without_duplicates):
-        raise ValueError("Duplicate rows found in target_times")
-
-    if isinstance(signals, list):
-        signals = pd.DataFrame({'signal_id': signals})
-    elif isinstance(signals, str):
-        signals = pd.read_csv(signals)
-
-    readings = _load_readings(readings_path, target_times, signals, window_size)
-    LOGGER.info('Loaded %s turbine readings', len(readings))
-
-    return readings
diff --git a/greenguard/targets.py b/greenguard/targets.py
new file mode 100644
index 0000000..18106b7
--- /dev/null
+++ b/greenguard/targets.py
@@ -0,0 +1,155 @@
+"""Targets module.
+
+This module contains functions to work with target_times.
+"""
+
+import logging
+import warnings
+
+import numpy as np
+import pandas as pd
+from tqdm.auto import trange
+
+LOGGER = logging.getLogger(__name__)
+
+
+def make_targets(target_times, window_size, target, new_targets=None):
+    target_times = target_times.sort_values('cutoff_time', ascending=True)
+    cutoff_times = target_times.cutoff_time
+    window_size = pd.to_timedelta(window_size)
+    original_size = len(target_times)
+    current_size = original_size
+    new_targets = new_targets or current_size
+
+    for index in trange(len(cutoff_times) - 1):
+        timestamp = cutoff_times.iloc[index]
+        next_time = cutoff_times.iloc[index + 1]
+
+        if timestamp + (window_size * 2) >= next_time:
+            continue
+
+        span_start = timestamp + window_size
+        span_end = next_time - window_size
+        span_length = (span_end - span_start).total_seconds()
+
+        delay = pd.to_timedelta(np.random.randint(span_length), unit='s')
+        cutoff_time = span_start + delay
+
+        target_times = target_times.append(pd.Series({
+            'turbine_id': target_times.iloc[index].turbine_id,
+            'cutoff_time': cutoff_time,
+            'target': target
+        }), ignore_index=True)
+
+        current_size = len(target_times)
+        if current_size == original_size + new_targets:
+            return target_times.sort_values('cutoff_time', ascending=True)
+
+    if current_size == original_size:
+        warnings.warn('There is no space left between to add more targets.')
+        return target_times
+
+    new_targets = new_targets - (current_size - original_size)
+    return make_targets(target_times, window_size, target, new_targets)
+
+
+def _to_timedelta(specification):
+    if isinstance(specification, int):
+        specification = '{}s'.format(specification)
+
+    return pd.to_timedelta(specification)
+
+
+def make_target_times(failure_dates, step, start=None, end=None, forecast_window=0,
+                      prediction_window=0, before=0, after=0, offset=0, max_true=None,
+                      max_false=None, shuffle=True):
+
+    step = _to_timedelta(step)
+    start = start or failure_dates.timestamp.min()
+    start = start or failure_dates.min()
+
+    forecast_window = _to_timedelta(forecast_window)
+    prediction_window = _to_timedelta(prediction_window)
+    before = _to_timedelta(before)
+    after = _to_timedelta(after)
+    offset = _to_timedelta(offset)
+
+    target_times = pd.DataFrame()
+    turbines = failure_dates.turbine_id.unique()
+    failures = failure_dates.set_index(['turbine_id', 'date'])
+
+    for turbine in turbines:
+        turbine_failures = failures.loc[turbine]
+
+        min_failure_date = turbine_failures.index.min() - before
+        last_failure_date = turbine_failures.index.max() + after
+        turbine_targets = list()
+        while min_failure_date < last_failure_date:
+            max_failure_date = min_failure_date + prediction_window
+            day_failures = turbine_failures.loc[min_failure_date:max_failure_date]
+
+            min_failure_date = min_failure_date + offset
+
+            turbine_targets.append({
+                'turbine_id': turbine,
+                'target': int(bool(len(day_failures))),
+                'cutoff_time': min_failure_date - forecast_window
+            })
+
+        turbine_targets = pd.DataFrame(turbine_targets)
+        failed = turbine_targets[turbine_targets.target == 1]
+        target_times = target_times.append(failed)
+
+        non_failed = turbine_targets[turbine_targets.target == 0]
+        non_failed = non_failed.sample(min(max_false, len(non_failed)))
+
+        target_times = target_times.append(non_failed)
+
+    if shuffle:
+        target_times = target_times.sample(len(target_times))
+
+    return target_times
+
+
+def _valid_targets(timestamps):
+    def apply_function(row):
+        cutoff = row.cutoff_time
+        try:
+            times = timestamps.loc[row.turbine_id]
+        except KeyError:
+            return False
+
+        return times['min'] < cutoff < times['max']
+
+    return apply_function
+
+
+def select_valid_targets(target_times, readings, window_size):
+    """Filter out target_times without enough data for this window_size.
+
+    The table_times table is scanned and checked against the readings table
+    considering the window_size. All the target times entries that do not
+    have enough data are dropped.
+
+    Args:
+        target_times (pandas.DataFrame):
+            Target times table, with at least turbined_id and cutoff_time fields.
+        readings (pandas.DataFrame):
+            Readings table, with at least turbine_id, signal_id, and timestamp ields.
+        window_size (str or pandas.TimeDelta):
+            TimeDelta specification that indicates the lenght of the training window.
+
+    Returns:
+        pandas.DataFrame:
+            New target_times table without the invalid targets.
+    """
+
+    timestamps = readings.groupby('turbine_id').timestamp.agg(['min', 'max'])
+    timestamps['min'] += pd.to_timedelta(window_size)
+
+    valid = target_times.apply(_valid_targets(timestamps), axis=1)
+    valid_targets = target_times[valid].copy()
+
+    LOGGER.info('Dropped %s invalid targets', len(target_times) - len(valid_targets))
+
+    return valid_targets

From e8c65f8c2b1652c22431fd455e2a6347d9b1ae8b Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 30 Jan 2020 14:06:01 -0500
Subject: [PATCH 003/171] Add docstrings and small robustness and usability
 improvements

---
 greenguard/loaders/__init__.py |  30 +----
 greenguard/loaders/csv.py      |  84 ++++++++++++--
 greenguard/pipeline.py         | 196 ++++++++++++++++++++++++++++++++-
 3 files changed, 271 insertions(+), 39 deletions(-)

diff --git a/greenguard/loaders/__init__.py b/greenguard/loaders/__init__.py
index a4011fb..169c687 100644
--- a/greenguard/loaders/__init__.py
+++ b/greenguard/loaders/__init__.py
@@ -1,28 +1,6 @@
-import logging
+from greenguard.loaders.csv import CSVLoader
 
-LOGGER = logging.getLogger(__name__)
 
-
-def _valid_targets(timestamps):
-    def apply_function(row):
-        cutoff = row.cutoff_time
-        try:
-            times = timestamps.loc[row.turbine_id]
-        except KeyError:
-            return False
-
-        return times['min'] < cutoff < times['max']
-
-    return apply_function
-
-
-def select_valid_targets(target_times, readings, window_size):
-    timestamps = readings.groupby('turbine_id').timestamp.agg(['min', 'max'])
-    timestamps['min'] += window_size
-
-    valid = target_times.apply(_valid_targets(timestamps), axis=1)
-    valid_targets = target_times[valid].copy()
-
-    LOGGER.info('Dropped %s invalid targets', len(target_times) - len(valid_targets))
-
-    return valid_targets
+__all__ = (
+    'CSVLoader',
+)
diff --git a/greenguard/loaders/csv.py b/greenguard/loaders/csv.py
index b032ba0..1d4bf9c 100644
--- a/greenguard/loaders/csv.py
+++ b/greenguard/loaders/csv.py
@@ -4,16 +4,49 @@
 import dask
 import pandas as pd
 
+from greenguard.targets import select_valid_targets
+
 LOGGER = logging.getLogger(__name__)
 
 
 class CSVLoader:
-
-    def __init__(self, readings_path='.', rule=None, aggregation='mean', unstack=True):
+    """Load the required readings from CSV files.
+
+    The CSVLoader class is responsible for analyzing the target_times table
+    and then load the required readings from CSV files.
+
+    Also, optionally, it can perform a resampling aggregation while loading
+    the data, reducing the amount of memory requirements.
+
+    The CSVLoader class uses Dask to parallelize all the IO and resampling
+    computation and reduce loading times.
+
+    Args:
+        readings_path (str):
+            Path to the readings folder, where a folder exist for each turbine.
+        rule (str):
+            Resampling rule, as expected by ``DataFrame.resmple``. The rule is a
+            string representation of a TimeDelta, which includes a number and a
+            unit. For example: ``3d``, ``1w``, ``6h``.
+            If ``None``, resampling is disabled.
+        aggregation (str):
+            Name of the aggregation to perform during the resampling.
+        unstack (bool):
+            Whether to unstack the resampled data, generating one column per signal.
+            Only used when resampling. Defaults to ``False``.
+    """
+
+    DEFAULT_DATETIME_FMT = '%Y-%m-%dT%M:%H:%S'
+    DEFAULT_FILENAME_FMT = '%Y-%m-.csv'
+
+    def __init__(self, readings_path='.', rule=None, aggregation='mean', unstack=False,
+                 datetime_fmt=DEFAULT_DATETIME_FMT, filename_fmt=DEFAULT_FILENAME_FMT):
         self._readings_path = readings_path
         self._rule = rule
         self._aggregation = aggregation
         self._unstack = unstack
+        self._datetime_fmt = datetime_fmt
+        self._filename_fmt = filename_fmt
 
     @dask.delayed
     def __filter_by_signal(self, readings, signals):
@@ -28,7 +61,7 @@ def __filter_by_signal(self, readings, signals):
     @dask.delayed
     def __filter_by_timestamp(self, readings, timestamps):
         LOGGER.debug('Parsing timestamps')
-        readings_ts = pd.to_datetime(readings['timestamp'], format='%m/%d/%y %H:%M:%S')
+        readings_ts = pd.to_datetime(readings['timestamp'], format=self._datetime_fmt)
         readings['timestamp'] = readings_ts
 
         LOGGER.debug('Filtering by timestamp')
@@ -76,10 +109,9 @@ def __consolidate(self, readings, turbine_id):
 
         return readings
 
-    @staticmethod
-    def _get_filenames(turbine_path, timestamps):
-        min_csv = timestamps.start.dt.strftime('%Y-%m-.csv')
-        max_csv = timestamps.stop.dt.strftime('%Y-%m-.csv')
+    def _get_filenames(self, turbine_path, timestamps):
+        min_csv = timestamps.start.dt.strftime(self._filename_fmt)
+        max_csv = timestamps.stop.dt.strftime(self._filename_fmt)
 
         for filename in sorted(os.listdir(turbine_path)):
             if ((min_csv <= filename) & (filename <= max_csv)).any():
@@ -138,7 +170,31 @@ def _get_timestamps(target_times, window_size):
             'stop': cutoff_times,
         })
 
-    def load(self, target_times, window_size, signals=None, debug=False):
+    def load(self, target_times, window_size, signals=None, debug=False, select_valid=True):
+        """Load the readings needed for the given target_times and window_size.
+
+        Optionally filter the signals that are loaded and discard the rest.
+
+        Args:
+            target_times (str or pandas.DataFrame):
+                target_times ``DataFrame`` or path to the corresponding CSV file.
+                The table must have three volumns, ``turbine_id``, ``target`` and
+                ``cutoff_time``.
+            window_size (str):
+                Amount of data to load before each cutoff time, specified as a string
+                representation of a TimeDelta, which includes a number and a
+                unit. For example: ``3d``, ``1w``, ``6h``.
+            signals (list or pandas.DataFrame):
+                List of signal names or table that has a ``signal_id`` column to
+                use as the signal names list.
+            debug (bool):
+                Force single thread execution for easy debugging. Defaults to ``False``.
+
+        Returns:
+            pandas.DataFrame:
+                Table of readings for the target times, including the columns ``turbine_id``,
+                ``signal_id``, ``timestamp`` and ``value``.
+        """
         if isinstance(target_times, str):
             target_times = pd.read_csv(target_times)
             target_times['cutoff_time'] = pd.to_datetime(target_times['cutoff_time'])
@@ -155,8 +211,18 @@ def load(self, target_times, window_size, signals=None, debug=False):
 
         dask_scheduler = 'single-threaded' if debug else None
         computed = dask.compute(*readings, scheduler=dask_scheduler)
-        readings = pd.concat((c for c in computed if len(c)), ignore_index=True, sort=False)
+
+        found_readings = [c for c in computed if len(c)]
+        if not found_readings:
+            msg = 'No readings found for the given target times in {}'.format(self._readings_path)
+            raise ValueError(msg)
+
+        readings = pd.concat(found_readings, ignore_index=True, sort=False)
 
         LOGGER.info('Loaded %s turbine readings', len(readings))
 
+        if select_valid:
+            target_times = select_valid_targets(target_times, readings, window_size)
+            return target_times, readings
+
         return readings
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index b4db17f..509f766 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -22,18 +22,103 @@
 PIPELINES_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'pipelines'))
 
 
-def get_pipelines():
+def get_pipelines(pattern='', path=False):
+    """Get the list of available pipelines.
+
+    Optionally filter the names using a patter or obtain
+    the paths to the pipelines alongside their name.
+
+    Args:
+        pattern (str):
+            Pattern to search for in the pipeline names
+        path (bool):
+            Whether to return a dictionary containing the pipeline
+            paths instead of only a list with the names.
+
+    Return:
+        list or dict:
+            List of available and matching pipeline names.
+            If `path=True`, return a dict containing the pipeline
+            names as keys and their absolute paths as values.
+    """
     pipelines = dict()
     for filename in os.listdir(PIPELINES_DIR):
-        if filename.endswith('.json'):
+        if filename.endswith('.json') and pattern in filename:
             name = os.path.basename(filename)[:-len('.json')]
-            path = os.path.join(PIPELINES_DIR, filename)
-            pipelines[name] = path
+            pipeline_path = os.path.join(PIPELINES_DIR, filename)
+            pipelines[name] = pipeline_path
+
+    if not path:
+        pipelines = list(pipelines)
 
     return pipelines
 
 
 class GreenGuardPipeline(object):
+    """Main Machine Learning component in the GreenGuard project.
+
+    The ``GreenGuardPipeline`` represents the abstraction of a Machine
+    Learning pipeline architecture specialized on the GreenGuard data
+    format.
+
+    In order to use it, an MLBlocks pipeline template needs to be given,
+    alongside information about how to evaluate its performance using
+    cross validation.
+
+    Attributes:
+        template (MLPipeline):
+            MLPipeline instance used as the template for tuning.
+        template_name:
+            Name of the template being used.
+        fitted (bool):
+            Whether this GreenGuardPipeline has already been fitted or not.
+        steps (list):
+            List of primitives that compose this template.
+        preprocessing (list):
+            List of preprocessing steps. These steps have no learning stage
+            and are executed only once on the complete training dataset, before
+            partitioning it for cross validation.
+        static (list):
+            List of static steps. These are all the steps in the pipeline that
+            come after the preprocessing ones but have no hyperparameters.
+            These are executed on each cross validation split only once, when
+            the data is partitioned, and their output is cached to be reused
+            later on at every tuning iteration.
+        tunable (list):
+            List of steps that have hyperparameters and will be tuned during
+            the tuning loop.
+
+    Args:
+        template (str or MLPipeline):
+            Template to use. If a ``str`` is given, load the corresponding
+            ``MLPipeline``.
+        metric (str or function):
+            Metric to use. If an ``str`` is give it must be one of the metrics
+            defined in the ``greenguard.metrics.METRICS`` dictionary.
+        cost (bool):
+            Whether the metric is a cost function (the lower the better) or not.
+            Defaults to ``False``.
+        init_params (dict):
+            Initial parameters to pass to the underlying MLPipeline if something
+            other than the defaults need to be used.
+            Defaults to ``None``.
+        stratify (bool):
+            Whether to stratify the data when partitioning for cross validation.
+            Defaults to ``True``.
+        cv_splits (int):
+            Number of cross validation folds to use. Defaults to ``5``.
+        shuffle (bool):
+            Whether to shuffle the data when partitioning for cross validation.
+            Defaults to ``True``.
+        random_state (int or RandomState):
+            random state to use for the cross validation partitioning.
+            Defaults to ``0``.
+        preprocessing (int):
+            Number of steps to execute during the preprocessing stage.
+            The number of preprocessing steps cannot be higher than the
+            number of static steps in the given template.
+            Defaults to ``0``.
+    """
 
     template = None
     template_name = None
@@ -82,6 +167,12 @@ def _update_params(old, new):
                 block_params[param] = value
 
     def set_init_params(self, init_params):
+        """Set new init params for the template and pipeline.
+
+        Args:
+            init_params (dict):
+                New init_params to use.
+        """
         template_params = self.template['init_params']
         self._update_params(template_params, init_params)
         self._build_pipeline()
@@ -140,9 +231,23 @@ def __repr__(self):
         )
 
     def get_hyperparameters(self):
+        """Get the current hyperparameters.
+
+        Returns:
+            dict:
+                Current hyperparameters.
+        """
         return deepcopy(self._hyperparameters)
 
     def set_hyperparameters(self, hyperparameters):
+        """Set new hyperparameters for this pipeline instance.
+
+        The template ``init_params`` remain unmodified.
+
+        Args:
+            hyperparameters (dict):
+                New hyperparameters to use.
+        """
         self._update_params(self._hyperparameters, hyperparameters)
         self._build_pipeline()
 
@@ -185,6 +290,35 @@ def _generate_splits(self, X, y, readings):
         return splits
 
     def cross_validate(self, X=None, y=None, readings=None, params=None):
+        """Compute cross validation score using the given data.
+
+        If the splits have not been previously computed, compute them now.
+        During this computation, the data is partitioned using the indicated
+        cross validation parameters and later on processed using the
+        pipeline static steps.
+
+        The results of the fit and produce executions are cached and reused
+        in subsequent calls to this method.
+
+        Args:
+            X (pandas.DataFrame):
+                ``target_times`` data, without the ``target`` column.
+                Only needed if the splits have not been previously computed.
+            y (pandas.Series or numpy.ndarray):
+                ``target`` vector corresponding to the passed ``target_times``.
+                Only needed if the splits have not been previously computed.
+            readings (pandas.DataFrame):
+                ``readings`` table. Only needed if the splits have not been
+                previously computed.
+            params (dict):
+                hyperparameter values to use.
+
+        Returns:
+            float:
+                Computed cross validation score. This score is the average
+                of the scores obtained accross all the cross validation folds.
+        """
+
         if self._splits is None:
             LOGGER.info('Running static steps before cross validation')
             self._splits = self._generate_splits(X, y, readings)
@@ -272,6 +406,21 @@ def _get_tuner(self):
         return tuner
 
     def tune(self, X=None, y=None, readings=None, iterations=10):
+        """Tune this pipeline for the indicated number of iterations.
+
+        Args:
+            X (pandas.DataFrame):
+                ``target_times`` data, without the ``target`` column.
+                Only needed if the splits have not been previously computed.
+            y (pandas.Series or numpy.ndarray):
+                ``target`` vector corresponding to the passed ``target_times``.
+                Only needed if the splits have not been previously computed.
+            readings (pandas.DataFrame):
+                ``readings`` table. Only needed if the splits have not been
+                previously computed.
+            iterations (int):
+                Number of iterations to perform.
+        """
         if not self._tuner:
             LOGGER.info('Scoring the default pipeline')
             self.cv_score = self.cross_validate(X, y, readings)
@@ -303,20 +452,59 @@ def tune(self, X=None, y=None, readings=None, iterations=10):
                                  i + 1, failed)
 
     def fit(self, X, y, readings):
+        """Fit this pipeline to the given data.
+
+        Args:
+            X (pandas.DataFrame):
+                ``target_times`` data, without the ``target`` column.
+            y (pandas.Series or numpy.ndarray):
+                ``target`` vector corresponding to the passed ``target_times``.
+            readings (pandas.DataFrame):
+                ``readings`` table.
+        """
         self._pipeline.fit(X, y, readings=readings)
         self.fitted = True
 
     def predict(self, X, readings):
+        """Make predictions using this pipeline.
+
+        Args:
+            X (pandas.DataFrame):
+                ``target_times`` data, containing the ``turbine_id`` and
+                the ``cutoff_time`` column.
+            readings (pandas.DataFrame):
+                ``readings`` table.
+
+        Returns:
+            numpy.ndarray:
+                Vector of predictions.
+        """
         if not self.fitted:
             raise NotFittedError()
 
         return self._pipeline.predict(X, readings=readings)
 
     def save(self, path):
+        """Serialize and save this pipeline using cloudpickle.
+
+        Args:
+            path (str):
+                Path to the file where the pipeline will be saved.
+        """
         with open(path, 'wb') as pickle_file:
             cloudpickle.dump(self, pickle_file)
 
     @classmethod
     def load(cls, path):
+        """Load a previously saved pipeline from a file.
+
+        Args:
+            path (str):
+                Path to the file where the pipeline is saved.
+
+        Returns:
+            GreenGuardPipeline:
+                Loaded GreenGuardPipeline instance.
+        """
         with open(path, 'rb') as pickle_file:
             return cloudpickle.load(pickle_file)

From 1726d77b4bb160ea22d9da3fc54d9383dd5adffd Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 30 Jan 2020 14:06:39 -0500
Subject: [PATCH 004/171] Upgrade MLPrimitives and improve pipelines

---
 .../{ => disabled}/dfs_xgb_classifier.json    |   0
 .../normalize_dfs_xgb_classifier.json         |   0
 .../resample_dfs_xgb_classifier.json          |   0
 ...resample_normalize_dfs_xgb_classifier.json |   4 +-
 .../resample_unstack_dfs_xgb_classifier.json  |   0
 ...ack_double_lstm_timeseries_classifier.json |   0
 ...le_unstack_lstm_timeseries_classifier.json |   8 +-
 ..._unstack_normalize_dfs_xgb_classifier.json |   0
 ...unstack_24_lstm_timeseries_classifier.json | 119 ++++++++++++++++++
 ..._double_24_lstm_timeseries_classifier.json | 119 ++++++++++++++++++
 ..._600s_normalize_dfs_1d_xgb_classifier.json |  65 ++++++++++
 ...nstack_144_lstm_timeseries_classifier.json | 119 ++++++++++++++++++
 ...le_600s_unstack_dfs_1d_xgb_classifier.json |  78 ++++++++++++
 ...double_144_lstm_timeseries_classifier.json | 119 ++++++++++++++++++
 ...stack_normalize_dfs_1d_xgb_classifier.json |  69 ++++++++++
 .../unstacked_dfs_xgb_classifier.json         |   0
 ...ked_double_lstm_timeseries_classifier.json |   0
 .../unstacked_lstm_timeseries_classifier.json |   0
 ...nstacked_normalize_dfs_xgb_classifier.json |   0
 setup.py                                      |   7 +-
 20 files changed, 697 insertions(+), 10 deletions(-)
 rename greenguard/pipelines/{ => disabled}/dfs_xgb_classifier.json (100%)
 rename greenguard/pipelines/{ => disabled}/normalize_dfs_xgb_classifier.json (100%)
 rename greenguard/pipelines/{ => disabled}/resample_dfs_xgb_classifier.json (100%)
 rename greenguard/pipelines/{ => disabled}/resample_normalize_dfs_xgb_classifier.json (96%)
 rename greenguard/pipelines/{ => disabled}/resample_unstack_dfs_xgb_classifier.json (100%)
 rename greenguard/pipelines/{ => disabled}/resample_unstack_double_lstm_timeseries_classifier.json (100%)
 rename greenguard/pipelines/{ => disabled}/resample_unstack_lstm_timeseries_classifier.json (97%)
 rename greenguard/pipelines/{ => disabled}/resample_unstack_normalize_dfs_xgb_classifier.json (100%)
 create mode 100644 greenguard/pipelines/resample_3600s_unstack_24_lstm_timeseries_classifier.json
 create mode 100644 greenguard/pipelines/resample_3600s_unstack_double_24_lstm_timeseries_classifier.json
 create mode 100644 greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json
 create mode 100644 greenguard/pipelines/resample_600s_unstack_144_lstm_timeseries_classifier.json
 create mode 100644 greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json
 create mode 100644 greenguard/pipelines/resample_600s_unstack_double_144_lstm_timeseries_classifier.json
 create mode 100644 greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json
 rename greenguard/pipelines/{ => unstacked}/unstacked_dfs_xgb_classifier.json (100%)
 rename greenguard/pipelines/{ => unstacked}/unstacked_double_lstm_timeseries_classifier.json (100%)
 rename greenguard/pipelines/{ => unstacked}/unstacked_lstm_timeseries_classifier.json (100%)
 rename greenguard/pipelines/{ => unstacked}/unstacked_normalize_dfs_xgb_classifier.json (100%)

diff --git a/greenguard/pipelines/dfs_xgb_classifier.json b/greenguard/pipelines/disabled/dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/dfs_xgb_classifier.json
rename to greenguard/pipelines/disabled/dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/normalize_dfs_xgb_classifier.json b/greenguard/pipelines/disabled/normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/normalize_dfs_xgb_classifier.json
rename to greenguard/pipelines/disabled/normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/resample_dfs_xgb_classifier.json b/greenguard/pipelines/disabled/resample_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/resample_dfs_xgb_classifier.json
rename to greenguard/pipelines/disabled/resample_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/resample_normalize_dfs_xgb_classifier.json b/greenguard/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json
similarity index 96%
rename from greenguard/pipelines/resample_normalize_dfs_xgb_classifier.json
rename to greenguard/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json
index bf32034..3d7d4d2 100644
--- a/greenguard/pipelines/resample_normalize_dfs_xgb_classifier.json
+++ b/greenguard/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json
@@ -10,7 +10,7 @@
     ],
     "init_params": {
         "pandas.DataFrame.resample#1": {
-            "rule": "1h",
+            "rule": "600s",
             "on": "timestamp",
             "groupby": [
                 "turbine_id",
@@ -46,7 +46,7 @@
             "copy": true,
             "verbose": true,
             "n_jobs": 1,
-            "training_window": "3d"
+            "training_window": "1d"
         }
     },
     "input_names": {
diff --git a/greenguard/pipelines/resample_unstack_dfs_xgb_classifier.json b/greenguard/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/resample_unstack_dfs_xgb_classifier.json
rename to greenguard/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/resample_unstack_double_lstm_timeseries_classifier.json b/greenguard/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/resample_unstack_double_lstm_timeseries_classifier.json
rename to greenguard/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/resample_unstack_lstm_timeseries_classifier.json b/greenguard/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json
similarity index 97%
rename from greenguard/pipelines/resample_unstack_lstm_timeseries_classifier.json
rename to greenguard/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json
index de2d1ce..e33e83b 100644
--- a/greenguard/pipelines/resample_unstack_lstm_timeseries_classifier.json
+++ b/greenguard/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json
@@ -14,7 +14,7 @@
     ],
     "init_params": {
         "pandas.DataFrame.resample#1": {
-            "rule": "3600s",
+            "rule": "600s",
             "on": "timestamp",
             "groupby": [
                 "turbine_id",
@@ -50,7 +50,7 @@
             "key": "timestamp"
         },
         "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "window_size": 72,
+            "window_size": 144,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
         },
@@ -58,8 +58,8 @@
             "epochs": 35,
             "verbose": true,
             "input_shape": [
-                72,
-                97
+                144,
+                26
             ]
         }
     },
diff --git a/greenguard/pipelines/resample_unstack_normalize_dfs_xgb_classifier.json b/greenguard/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/resample_unstack_normalize_dfs_xgb_classifier.json
rename to greenguard/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/resample_3600s_unstack_24_lstm_timeseries_classifier.json b/greenguard/pipelines/resample_3600s_unstack_24_lstm_timeseries_classifier.json
new file mode 100644
index 0000000..7e494d5
--- /dev/null
+++ b/greenguard/pipelines/resample_3600s_unstack_24_lstm_timeseries_classifier.json
@@ -0,0 +1,119 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
+        "pandas.DataFrame.pop",
+        "pandas.DataFrame.pop",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.preprocessing.MinMaxScaler",
+        "pandas.DataFrame",
+        "pandas.DataFrame.set",
+        "pandas.DataFrame.set",
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "keras.Sequential.LSTMTimeSeriesClassifier"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "3600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "feature_range": [
+                -1,
+                1
+            ]
+        },
+        "pandas.DataFrame#1": {
+            "index": null,
+            "columns": null
+        },
+        "pandas.DataFrame.set#1": {
+            "key": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "key": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "window_size": 24,
+            "cutoff_time": "cutoff_time",
+            "time_index": "timestamp"
+        },
+        "keras.Sequential.LSTMTimeSeriesClassifier": {
+            "epochs": 35,
+            "verbose": false
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#2": {
+            "X": "readings"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.set#1": {
+            "X": "readings",
+            "value": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "X": "readings",
+            "value": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "timeseries": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/resample_3600s_unstack_double_24_lstm_timeseries_classifier.json b/greenguard/pipelines/resample_3600s_unstack_double_24_lstm_timeseries_classifier.json
new file mode 100644
index 0000000..7f4e8a6
--- /dev/null
+++ b/greenguard/pipelines/resample_3600s_unstack_double_24_lstm_timeseries_classifier.json
@@ -0,0 +1,119 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
+        "pandas.DataFrame.pop",
+        "pandas.DataFrame.pop",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.preprocessing.MinMaxScaler",
+        "pandas.DataFrame",
+        "pandas.DataFrame.set",
+        "pandas.DataFrame.set",
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "3600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "feature_range": [
+                -1,
+                1
+            ]
+        },
+        "pandas.DataFrame#1": {
+            "index": null,
+            "columns": null
+        },
+        "pandas.DataFrame.set#1": {
+            "key": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "key": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "window_size": 24,
+            "cutoff_time": "cutoff_time",
+            "time_index": "timestamp"
+        },
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier": {
+            "epochs": 35,
+            "verbose": false
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#2": {
+            "X": "readings"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.set#1": {
+            "X": "readings",
+            "value": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "X": "readings",
+            "value": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "timeseries": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json b/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json
new file mode 100644
index 0000000..3d7d4d2
--- /dev/null
+++ b/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json
@@ -0,0 +1,65 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "featuretools.EntitySet.entity_from_dataframe",
+        "featuretools.EntitySet.normalize_entity",
+        "featuretools.EntitySet.normalize_entity",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": true
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "entity_id": "readings",
+            "index": "reading_id",
+            "make_index": true,
+            "time_index": "timestamp"
+        },
+        "featuretools.EntitySet.normalize_entity#1": {
+            "base_entity_id": "readings",
+            "new_entity_id": "turbines",
+            "index": "turbine_id",
+            "make_time_index": false
+        },
+        "featuretools.EntitySet.normalize_entity#2": {
+            "base_entity_id": "readings",
+            "new_entity_id": "signals",
+            "index": "signal_id",
+            "make_time_index": false
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": true,
+            "n_jobs": 1,
+            "training_window": "1d"
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "dataframe": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/resample_600s_unstack_144_lstm_timeseries_classifier.json b/greenguard/pipelines/resample_600s_unstack_144_lstm_timeseries_classifier.json
new file mode 100644
index 0000000..b54702b
--- /dev/null
+++ b/greenguard/pipelines/resample_600s_unstack_144_lstm_timeseries_classifier.json
@@ -0,0 +1,119 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
+        "pandas.DataFrame.pop",
+        "pandas.DataFrame.pop",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.preprocessing.MinMaxScaler",
+        "pandas.DataFrame",
+        "pandas.DataFrame.set",
+        "pandas.DataFrame.set",
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "keras.Sequential.LSTMTimeSeriesClassifier"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "feature_range": [
+                -1,
+                1
+            ]
+        },
+        "pandas.DataFrame#1": {
+            "index": null,
+            "columns": null
+        },
+        "pandas.DataFrame.set#1": {
+            "key": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "key": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "window_size": 144,
+            "cutoff_time": "cutoff_time",
+            "time_index": "timestamp"
+        },
+        "keras.Sequential.LSTMTimeSeriesClassifier": {
+            "epochs": 35,
+            "verbose": false
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#2": {
+            "X": "readings"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.set#1": {
+            "X": "readings",
+            "value": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "X": "readings",
+            "value": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "timeseries": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json b/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json
new file mode 100644
index 0000000..60be686
--- /dev/null
+++ b/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json
@@ -0,0 +1,78 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
+        "featuretools.EntitySet.entity_from_dataframe",
+        "featuretools.EntitySet.entity_from_dataframe",
+        "featuretools.EntitySet.add_relationship",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "entity_id": "readings",
+            "index": "reading_id",
+            "make_index": true,
+            "time_index": "timestamp"
+        },
+        "featuretools.EntitySet.entity_from_dataframe#2": {
+            "entity_id": "turbines",
+            "index": "turbine_id",
+            "make_index": false
+        },
+        "featuretools.EntitySet.add_relationship#1": {
+            "parent": "turbines",
+            "parent_column": "turbine_id",
+            "child": "readings",
+            "child_column": "turbine_id"
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": true,
+            "n_jobs": 1,
+            "training_window": "1d"
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "dataframe": "readings"
+        },
+        "featuretools.EntitySet.entity_from_dataframe#2": {
+            "dataframe": "turbines"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/resample_600s_unstack_double_144_lstm_timeseries_classifier.json b/greenguard/pipelines/resample_600s_unstack_double_144_lstm_timeseries_classifier.json
new file mode 100644
index 0000000..368dd4d
--- /dev/null
+++ b/greenguard/pipelines/resample_600s_unstack_double_144_lstm_timeseries_classifier.json
@@ -0,0 +1,119 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
+        "pandas.DataFrame.pop",
+        "pandas.DataFrame.pop",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.preprocessing.MinMaxScaler",
+        "pandas.DataFrame",
+        "pandas.DataFrame.set",
+        "pandas.DataFrame.set",
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "feature_range": [
+                -1,
+                1
+            ]
+        },
+        "pandas.DataFrame#1": {
+            "index": null,
+            "columns": null
+        },
+        "pandas.DataFrame.set#1": {
+            "key": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "key": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "window_size": 144,
+            "cutoff_time": "cutoff_time",
+            "time_index": "timestamp"
+        },
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier": {
+            "epochs": 35,
+            "verbose": false
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#2": {
+            "X": "readings"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.set#1": {
+            "X": "readings",
+            "value": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "X": "readings",
+            "value": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "timeseries": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json b/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json
new file mode 100644
index 0000000..b0550ee
--- /dev/null
+++ b/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json
@@ -0,0 +1,69 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
+        "featuretools.EntitySet.entity_from_dataframe",
+        "featuretools.EntitySet.normalize_entity",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "entity_id": "readings",
+            "index": "reading_id",
+            "make_index": true,
+            "time_index": "timestamp"
+        },
+        "featuretools.EntitySet.normalize_entity#1": {
+            "base_entity_id": "readings",
+            "new_entity_id": "turbines",
+            "index": "turbine_id",
+            "make_time_index": false
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": true,
+            "n_jobs": 1,
+            "training_window": "1d"
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "dataframe": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/unstacked_dfs_xgb_classifier.json b/greenguard/pipelines/unstacked/unstacked_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstacked_dfs_xgb_classifier.json
rename to greenguard/pipelines/unstacked/unstacked_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/unstacked_double_lstm_timeseries_classifier.json b/greenguard/pipelines/unstacked/unstacked_double_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstacked_double_lstm_timeseries_classifier.json
rename to greenguard/pipelines/unstacked/unstacked_double_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/unstacked_lstm_timeseries_classifier.json b/greenguard/pipelines/unstacked/unstacked_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstacked_lstm_timeseries_classifier.json
rename to greenguard/pipelines/unstacked/unstacked_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/unstacked_normalize_dfs_xgb_classifier.json b/greenguard/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstacked_normalize_dfs_xgb_classifier.json
rename to greenguard/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json
diff --git a/setup.py b/setup.py
index 8f7de72..5893f14 100644
--- a/setup.py
+++ b/setup.py
@@ -16,9 +16,9 @@
     history = ''
 
 install_requires = [
-    'baytune>=0.2.3,<0.3',
     'mlblocks>=0.3.4,<0.4',
-    'mlprimitives>=0.2.3,<0.3',
+    'mlprimitives>=0.2.4,<0.3',
+    'baytune>=0.2.3,<0.3',
     'numpy>=1.15.4,<1.17',
     'pymongo>=3.7.2,<4',
     'scikit-learn>=0.20.1,<0.21',
@@ -76,7 +76,6 @@
         'License :: OSI Approved :: MIT License',
         'Natural Language :: English',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
     ],
@@ -98,7 +97,7 @@
     long_description_content_type='text/markdown',
     name='greenguard',
     packages=find_packages(include=['greenguard', 'greenguard.*']),
-    python_requires='>=3.5',
+    python_requires='>=3.6',
     setup_requires=setup_requires,
     test_suite='tests',
     tests_require=tests_require,

From 462b9e1ac989ae66e541a3cf0f15a15d8211061c Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 3 Feb 2020 12:57:47 -0500
Subject: [PATCH 005/171] Fix input format and add demo functions

---
 greenguard/__init__.py         |  1 -
 greenguard/demo.py             | 36 +++++++++++++++++--
 greenguard/demo.py.new         | 66 ++++++++++++++++++++++++++++++++++
 greenguard/loaders/__init__.py |  1 -
 greenguard/loaders/csv.py      |  2 +-
 greenguard/pipeline.py         | 32 +++++++++--------
 tests/test_pipeline.py         | 48 ++++++++++++++-----------
 7 files changed, 145 insertions(+), 41 deletions(-)
 create mode 100644 greenguard/demo.py.new

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 1eab417..c530d4e 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -18,5 +18,4 @@
 __all__ = (
     'GreenGuardPipeline',
     'get_pipelines',
-    'load_demo'
 )
diff --git a/greenguard/demo.py b/greenguard/demo.py
index 369422b..789a50d 100644
--- a/greenguard/demo.py
+++ b/greenguard/demo.py
@@ -27,13 +27,43 @@ def _load_or_download(filename, dates):
     return data
 
 
-def load_demo():
+def load_demo(load_readings=True):
     """Load the demo included in the GreenGuard project.
+
     The first time that this function is executed, the data will be downloaded
     and cached inside the `greenguard/demo` folder.
     Subsequent calls will load the cached data instead of downloading it again.
+
+    Returns:
+        tuple[pandas.DataFrame]:
+            target_times and readings tables
     """
     target_times = _load_or_download('target_times', 'cutoff_time')
-    readings = _load_or_download('readings', 'timestamp')
+    if load_readings:
+        readings = _load_or_download('readings', 'timestamp')
+        return target_times, readings
+
+    return target_times
+
+
+def generate_raw_readings(output_path='demo'):
+    """Generate raw readings based on the demo data.
+
+    Args:
+        path (str):
+            Path where the readings will be generated.
+    """
+    target_times, readings = load_demo()
+
+    for turbine_id in target_times.turbine_id.unique():
+        turbine_path = os.path.join(output_path, turbine_id)
+        os.makedirs(turbine_path, exist_ok=True)
+        data = readings[readings.turbine_id == turbine_id]
+        for month in range(1, 13):
+            month_data = data[data.timestamp.dt.month == month].copy()
+            month_data['timestamp'] = month_data['timestamp'].dt.strftime('%m/%d/%y %M:%H:%S')
+            month_path = os.path.join(turbine_path, '2013-{:02d}-.csv'.format(month))
+            LOGGER.info('Generating file %s', month_path)
+            month_data.to_csv(month_path, index=False)
 
-    return target_times, readings
+    return target_times
diff --git a/greenguard/demo.py.new b/greenguard/demo.py.new
new file mode 100644
index 0000000..62a9eb1
--- /dev/null
+++ b/greenguard/demo.py.new
@@ -0,0 +1,66 @@
+import os
+import random
+from datetime import datetime, timedelta
+
+import pandas as pd
+
+
+def get_turbine_df(start, end, interval, signals):
+    data = list()
+    current = start
+    delta = timedelta(seconds=interval)
+    while current < end:
+        for signal in signals:
+            data.append({
+                'timestamp': current.strftime('%m/%d/%y %H:%M:%S'),
+                'signal_id': signal,
+                'value': random.random()
+            })
+
+        current = current + delta
+
+    return pd.DataFrame(data)[['timestamp', 'signal_id', 'value']]
+
+
+def generate_turbine_files(data_path, turbine_name, signals, interval):
+    turbine_path = os.path.join(data_path, turbine_name)
+    os.makedirs(turbine_path, exist_ok=True)
+
+    for year in range(2000, 2011):
+        for month in range(1, 13):
+            start = datetime(year, month, 1)
+            end = datetime(year + (1 if month == 12 else 0), (month % 12) + 1, 1)
+            tdf = get_turbine_df(start, end, interval, signals)
+
+            csv_path = os.path.join(turbine_path, '{}-{:02d}-.csv'.format(year, month))
+            tdf.to_csv(csv_path)
+
+def _prefixed_range(prefix, size):
+    arr = pd.Series(np.arange(size) + 1).astype(str)
+    arr = arr.str.zfill(arr.str.len().max())
+
+    return prefix + arr
+
+
+def make_demo(path='.', signals=1, turbines=1, interval=600):
+    signals = _prefixed_range('S', signals)
+    turbines = _prefixed_range('S', turbines)
+    readings_path = os.path.join(path, readings)
+
+    for turbine in turbines:
+        generate_turbine_files(readings_path, turbine, signals, interval)
+
+
+    target_times = pd.DataFrame([
+        {'turbine_id': 'T001', 'cutoff_time': datetime(2005, 1, 1), 'target': False},
+        {'turbine_id': 'T001', 'cutoff_time': datetime(2007, 1, 2), 'target': True},
+        {'turbine_id': 'T001', 'cutoff_time': datetime(2009, 1, 2), 'target': False},
+        {'turbine_id': 'T002', 'cutoff_time': datetime(2005, 1, 1), 'target': True},
+        {'turbine_id': 'T002', 'cutoff_time': datetime(2007, 1, 2), 'target': False},
+        {'turbine_id': 'T002', 'cutoff_time': datetime(2009, 1, 2), 'target': True},
+        {'turbine_id': 'T003', 'cutoff_time': datetime(2005, 1, 1), 'target': False},
+        {'turbine_id': 'T003', 'cutoff_time': datetime(2007, 1, 2), 'target': True},
+        {'turbine_id': 'T003', 'cutoff_time': datetime(2009, 1, 2), 'target': False},
+    ])
+
+    target_times.to_csv('target_times.csv', index=False)
diff --git a/greenguard/loaders/__init__.py b/greenguard/loaders/__init__.py
index 169c687..0113f15 100644
--- a/greenguard/loaders/__init__.py
+++ b/greenguard/loaders/__init__.py
@@ -1,6 +1,5 @@
 from greenguard.loaders.csv import CSVLoader
 
-
 __all__ = (
     'CSVLoader',
 )
diff --git a/greenguard/loaders/csv.py b/greenguard/loaders/csv.py
index 1d4bf9c..a2db438 100644
--- a/greenguard/loaders/csv.py
+++ b/greenguard/loaders/csv.py
@@ -36,7 +36,7 @@ class CSVLoader:
             Only used when resampling. Defaults to ``False``.
     """
 
-    DEFAULT_DATETIME_FMT = '%Y-%m-%dT%M:%H:%S'
+    DEFAULT_DATETIME_FMT = '%m/%d/%y %M:%H:%S'
     DEFAULT_FILENAME_FMT = '%Y-%m-.csv'
 
     def __init__(self, readings_path='.', rule=None, aggregation='mean', unstack=False,
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 509f766..9783052 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -405,15 +405,13 @@ def _get_tuner(self):
 
         return tuner
 
-    def tune(self, X=None, y=None, readings=None, iterations=10):
+    def tune(self, target_times=None, readings=None, iterations=10):
         """Tune this pipeline for the indicated number of iterations.
 
         Args:
-            X (pandas.DataFrame):
-                ``target_times`` data, without the ``target`` column.
-                Only needed if the splits have not been previously computed.
-            y (pandas.Series or numpy.ndarray):
-                ``target`` vector corresponding to the passed ``target_times``.
+            target_times (pandas.DataFrame):
+                ``target_times`` table, containing the ``turbine_id``, ``cutoff_time``
+                and ``target`` columns.
                 Only needed if the splits have not been previously computed.
             readings (pandas.DataFrame):
                 ``readings`` table. Only needed if the splits have not been
@@ -423,6 +421,8 @@ def tune(self, X=None, y=None, readings=None, iterations=10):
         """
         if not self._tuner:
             LOGGER.info('Scoring the default pipeline')
+            X = target_times[['turbine_id', 'cutoff_time']]
+            y = target_times['target']
             self.cv_score = self.cross_validate(X, y, readings)
 
             LOGGER.info('Default Pipeline score: %s', self.cv_score)
@@ -451,27 +451,28 @@ def tune(self, X=None, y=None, readings=None, iterations=10):
                 LOGGER.exception("Caught an exception scoring pipeline %s with params:\n%s",
                                  i + 1, failed)
 
-    def fit(self, X, y, readings):
+    def fit(self, target_times, readings):
         """Fit this pipeline to the given data.
 
         Args:
-            X (pandas.DataFrame):
-                ``target_times`` data, without the ``target`` column.
-            y (pandas.Series or numpy.ndarray):
-                ``target`` vector corresponding to the passed ``target_times``.
+            target_times (pandas.DataFrame):
+                ``target_times`` table, containing the ``turbine_id``, ``cutoff_time``
+                and ``target`` columns.
             readings (pandas.DataFrame):
                 ``readings`` table.
         """
+        X = target_times[['turbine_id', 'cutoff_time']]
+        y = target_times['target']
         self._pipeline.fit(X, y, readings=readings)
         self.fitted = True
 
-    def predict(self, X, readings):
+    def predict(self, target_times, readings):
         """Make predictions using this pipeline.
 
         Args:
-            X (pandas.DataFrame):
-                ``target_times`` data, containing the ``turbine_id`` and
-                the ``cutoff_time`` column.
+            target_times (pandas.DataFrame):
+                ``target_times`` table, containing the ``turbine_id``, ``cutoff_time``
+                and ``target`` columns.
             readings (pandas.DataFrame):
                 ``readings`` table.
 
@@ -482,6 +483,7 @@ def predict(self, X, readings):
         if not self.fitted:
             raise NotFittedError()
 
+        X = target_times[['turbine_id', 'cutoff_time']]
         return self._pipeline.predict(X, readings=readings)
 
     def save(self, path):
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 80a9167..541ad6f 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -5,39 +5,47 @@
 from unittest import TestCase
 from unittest.mock import patch
 
-from mlblocks.discovery import find_pipelines, load_pipeline
+import pandas as pd
 
 from greenguard.pipeline import GreenGuardPipeline
 
 
 class TestGreenGuardPipeline(TestCase):
-    """Tests for `TimeSeriesClassifier`."""
 
-    PIPELINE_NAME = find_pipelines()[0]
+    def _get_data(self):
+        target_times = pd.DataFrame({
+            'turbine_id': ['T001'],
+            'cutoff_time': [pd.Timestamp('2010-01-01')],
+            'target': [1]
+        })
+        readings = pd.DataFrame({
+            'turbine_id': ['T001'],
+            'timestamp': [pd.Timestamp('2010-01-01')],
+            'signal_id': ['S1'],
+            'value': [0.1]
+        })
+        return target_times, readings
 
     @patch('greenguard.pipeline.MLPipeline')
-    def test_fit(self, pipeline_class_mock):
-        """fit prepare the pipeline to make predictions based on the given data."""
+    @patch('greenguard.pipeline.load_pipeline')
+    def test_fit(self, load_pipeline_mock, mlpipeline_mock):
+        load_pipeline_mock.return_value = dict()
+
         # Run
-        instance = GreenGuardPipeline(self.PIPELINE_NAME, 'accuracy')
-        instance.fit('an_X', 'a_y', 'readings')
+        instance = GreenGuardPipeline('a_pipeline', 'accuracy')
+        target_times, readings = self._get_data()
+        instance.fit(target_times, readings)
 
         # Asserts
-        pipeline_mock = pipeline_class_mock.return_value
-        pipeline_class_mock.assert_called_once_with(load_pipeline(self.PIPELINE_NAME))
-        assert instance._pipeline == pipeline_mock
-
-        pipeline_mock.fit.assert_called_once_with('an_X', 'a_y', readings='readings')
-
         assert instance.fitted
 
     @patch('greenguard.pipeline.MLPipeline')
-    def test_predict(self, pipeline_mock):
-        """predict produces results using the pipeline."""
+    @patch('greenguard.pipeline.load_pipeline')
+    def test_predict(self, load_pipeline_mock, mlpipeline_mock):
+        load_pipeline_mock.return_value = dict()
+
         # Run
-        instance = GreenGuardPipeline(self.PIPELINE_NAME, 'accuracy')
+        instance = GreenGuardPipeline('a_pipeline', 'accuracy')
         instance.fitted = True
-        instance.predict('an_X', 'readings')
-
-        # Asserts
-        pipeline_mock.return_value.predict.assert_called_once_with('an_X', readings='readings')
+        target_times, readings = self._get_data()
+        instance.predict(target_times, readings)

From 35df088a761b6312709cefcbd1a82e5d38c4a88d Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 3 Feb 2020 12:58:40 -0500
Subject: [PATCH 006/171] Update readme and examples

---
 .dockerignore                            |   1 +
 README.md                                | 488 ++++----------
 notebooks/CSVLoader Demo.ipynb           | 683 ++++++++++++++++++++
 notebooks/GreenGuard usage example.ipynb | 779 +++++++++++------------
 4 files changed, 1180 insertions(+), 771 deletions(-)
 create mode 100644 .dockerignore
 create mode 100644 notebooks/CSVLoader Demo.ipynb

diff --git a/.dockerignore b/.dockerignore
new file mode 100644
index 0000000..d8e7acb
--- /dev/null
+++ b/.dockerignore
@@ -0,0 +1 @@
+notebooks-private/
diff --git a/README.md b/README.md
index 169386f..cc0a5e0 100644
--- a/README.md
+++ b/README.md
@@ -19,19 +19,19 @@ AutoML for Renewable Energy Industries.
 
 # GreenGuard
 
-- Free software: MIT license
+- License: [MIT](https://github.com/D3-AI/GreenGuard/blob/master/LICENSE)
 - Documentation: https://D3-AI.github.io/GreenGuard
 - Homepage: https://github.com/D3-AI/GreenGuard
 
 # Overview
 
 The GreenGuard project is a collection of end-to-end solutions for machine learning problems
-commonly
-found in monitoring wind energy production systems. Most tasks utilize sensor data
+commonly found in monitoring wind energy production systems. Most tasks utilize sensor data
 emanating from monitoring systems. We utilize the foundational innovations developed for
 automation of machine Learning at Data to AI Lab at MIT.
 
 The salient aspects of this customized project are:
+
 * A set of ready to use, well tested pipelines for different machine learning tasks. These are
   vetted through testing across multiple publicly available datasets for the same task.
 * An easy interface to specify the task, pipeline, and generate results and summarize them.
@@ -41,16 +41,58 @@ The salient aspects of this customized project are:
 * A robust continuous integration and testing infrastructure.
 * A ``learning database`` recording all past outcomes --> tasks, pipelines, outcomes.
 
+# Requirements
+
+**GreenGuard** has been developed and runs on Python 3.6 and 3.7.
+
+Also, although it is not strictly required, the usage of a [virtualenv](
+https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid interfering
+with other software installed in the system where you are trying to run **GreenGuard**.
+
+# Install
+
+**GreenGuard** can be installed locally using [pip](https://pip.pypa.io/en/stable/) with
+the following command:
+
+```bash
+pip install greenguard
+```
+
+This will pull and install the latest stable release from [PyPi](https://pypi.org/).
+
+If you want to install from source or contribute to the project please read the
+[Contributing Guide](https://d3-ai.github.io/GreenGuard/contributing.html#get-started).
+
 # Data Format
 
-In order to be able to use the **GreenGuard Pipelines** to make predictions over you
-time Series data, you will need to following tables, formatted as CSV files:
+The input expected by the **GreenGuard** system consists of the following two elements,
+which need to be passed as `pandas.DataFrame` objects:
+
+## Target Times
+
+A table containing the specification of the problem that we are solving, which has three
+columns:
+
+* `turbine_id`: Unique identifier of the turbine which this label corresponds to.
+* `cutoff_time`: Time associated with this target
+* `target`: The value that we want to predict. This can either be a numerical value or a
+  categorical label. This column can also be skipped when preparing data that will be used
+  only to make predictions and not to fit any pipeline.
+
+|    | turbine_id   | cutoff_time         |   target |
+|----|--------------|---------------------|----------|
+|  0 | T1           | 2001-01-02 00:00:00 |        0 |
+|  1 | T1           | 2001-01-03 00:00:00 |        1 |
+|  2 | T2           | 2001-01-04 00:00:00 |        0 |
+
+## Readings
+
+A table containing the signal data from the different sensors, with the following columns:
 
-* A **Readings** table that contains:
   * `turbine_id`: Unique identifier of the turbine which this reading comes from.
   * `signal_id`: Unique identifier of the signal which this reading comes from.
-  * `timestamp`: Time where the reading took place, as an ISO formatted datetime.
-  * `value`: Numeric value of this reading.
+  * `timestamp (datetime)`: Time where the reading took place, as a datetime.
+  * `value (float)`: Numeric value of this reading.
 
 |    | turbine_id   | signal_id   | timestamp           |   value |
 |----|--------------|-------------|---------------------|---------|
@@ -67,213 +109,80 @@ time Series data, you will need to following tables, formatted as CSV files:
 | 10 | T1           | S2          | 2001-01-03 00:00:00 |      11 |
 | 11 | T1           | S2          | 2001-01-03 12:00:00 |      12 |
 
-* A **Target times** table that contains:
-  * `turbine_id`: Unique identifier of the turbine which this label corresponds to.
-  * `cutoff_time`: Time associated with this target
-  * `target`: The value that we want to predict. This can either be a numerical value or a
-    categorical label. This column can also be skipped when preparing data that will be used
-    only to make predictions and not to fit any pipeline.
-
-|    | turbine_id   | cutoff_time         |   target |
-|----|--------------|---------------------|----------|
-|  0 | T1           | 2001-01-02 00:00:00 |        0 |
-|  1 | T1           | 2001-01-03 00:00:00 |        1 |
-|  2 | T1           | 2001-01-04 00:00:00 |        0 |
-
-Additionally, if available, two more tables can be passed alongside the previous ones in order
-to provide additional information about the turbines and signals.
-
-* A **Turbines** table that contains a `turbine_id` and additional properties about each turbine
-
-|    | turbine_id   |   latitude |   longitude |   height | manufacturer   |
-|----|--------------|------------|-------------|----------|----------------|
-|  0 | T1           |    49.8729 |    -6.44571 |   23.435 | M1             |
-|  1 | T2           |    49.8729 |    -6.4457  |   24.522 | M1             |
-|  2 | T3           |    49.8729 |    -6.44565 |   23.732 | M2             |
-
-* A **Signals** table that contains a `signal_id` and additional properties about each signal
-
-|    | signal_id   | sensor_type   | sensor_brand   |   sensitivity |
-|----|-------------|---------------|----------------|---------------|
-|  0 | S1          | t1            | b1             |           200 |
-|  1 | S2          | t2            | b2             |           500 |
-
-## Demo Dataset
-
-For development and demonstration purposes, we include a dataset with data from several telemetry
-signals associated with one wind energy production turbine.
-
-This data, which has been already formatted as expected by the GreenGuard Pipelines, can be
-browsed and downloaded directly from the
-[d3-ai-greenguard AWS S3 Bucket](https://d3-ai-greenguard.s3.amazonaws.com/index.html).
-
-This dataset is adapted from the one used in the project by Cohen, Elliot J.,
-"Wind Analysis." Joint Initiative of the ECOWAS Centre for Renewable Energy and Energy Efficiency
-(ECREEE), The United Nations Industrial Development Organization (UNIDO) and the Sustainable
-Engineering Lab (SEL). Columbia University, 22 Aug. 2014.
-[Available online here](https://github.com/Ecohen4/ECREEE)
-
-The complete list of manipulations performed on the original dataset to convert it into the
-demo one that we are using here is exhaustively shown and explained in the
-[Green Guard Demo Data notebook](notebooks/Green%20Guard%20Demo%20Data.ipynb).
+## CSV Format
 
-# Concepts
-
-Before diving into the software usage, we briefly explain some concepts and terminology.
-
-## Primitive
-
-We call the smallest computational blocks used in a Machine Learning process
-**primitives**, which:
-
-* Can be either classes or functions.
-* Have some initialization arguments, which MLBlocks calls `init_params`.
-* Have some tunable hyperparameters, which have types and a list or range of valid values.
-
-## Template
-
-Primitives can be combined to form what we call **Templates**, which:
-
-* Have a list of primitives.
-* Have some initialization arguments, which correspond to the initialization arguments
-  of their primitives.
-* Have some tunable hyperparameters, which correspond to the tunable hyperparameters
-  of their primitives.
-
-## Pipeline
-
-Templates can be used to build **Pipelines** by taking and fixing a set of valid
-hyperparameters for a Template. Hence, Pipelines:
-
-* Have a list of primitives, which corresponds to the list of primitives of their template.
-* Have some initialization arguments, which correspond to the initialization arguments
-  of their template.
-* Have some hyperparameter values, which fall within the ranges of valid tunable
-  hyperparameters of their template.
-
-A pipeline can be fitted and evaluated using the MLPipeline API in MLBlocks.
-
-## Tuning
-
-We call tuning the process of, given a dataset and a template, find the pipeline derived from the
-given template that gets the best possible score on the given dataset.
-
-This process usually involves fitting and evaluating multiple pipelines with different hyperparameter
-values on the same data while using optimization algorithms to deduce which hyperparameters are more
-likely to get the best results in the next iterations.
-
-We call each one of these tries a **tuning iteration**.
-
-# Current tasks and pipelines
-
-In our current phase, we are addressing two tasks - time series classification and time series
-regression. To provide solutions for these two tasks we have two components.
-
-## GreenGuardPipeline
-
-This class is the one in charge of learning from the data and making predictions by building
-[MLBlocks](https://hdi-project.github.io/MLBlocks) pipelines and later on tuning them using
-[BTB](https://hdi-project.github.io/BTB/)
-
-## GreenGuardLoader
-
-A class responsible for loading the time series data from CSV files, and return it in the
-format ready to be used by the **GreenGuardPipeline**.
-
-# Install
-
-## Requirements
-
-**GreenGuard** has been developed and runs on Python 3.5, 3.6 and 3.7.
-
-Also, although it is not strictly required, the usage of a [virtualenv](https://virtualenv.pypa.io/en/latest/)
-is highly recommended in order to avoid interfering with other software installed in the system
-where you are trying to run **GreenGuard**.
-
-## Installation
-
-The simplest and recommended way to install **GreenGuard** is using pip:
-
-```bash
-pip install greenguard
-```
-
-For development, you can also clone the repository and install it from sources
-
-```bash
-git clone git@github.com:D3-AI/GreenGuard.git
-cd GreenGuard
-make install-develop
-```
+A part from the in-memory data format explained above, which is limited by the memory
+allocation capabilities of the system where it is run, **GreenGuard** is also prepared to
+load and work with data stored as a collection of CSV files, drastically increasing the amount
+of data which it can work with. Further details about this format can be found in the
+[project documentation site](https://D3-AI.github.io/GreenGuard/).
 
 # Quickstart
 
-In this example we will load some demo data using the **GreenGuardLoader** and fetch it to the
-**GreenGuardPipeline** for it to find the best possible pipeline, fit it using the given data
-and then make predictions from it.
+In this example we will load some demo data and classify it using a **GreenGuard Pipeline**.
 
-## 1. Load and explore the data
+## 1. Load and split the demo data
 
 The first step is to load the demo data.
 
-For this, we will import and call the `greenguard.loader.load_demo` function without any arguments:
+For this, we will import and call the `greenguard.demo.load_demo` function without any arguments:
 
 ```python
-from greenguard.loader import load_demo
+from greenguard.demo import load_demo
 
-X, y, readings = load_demo()
+target_times, readings = load_demo()
 ```
 
 The returned objects are:
 
-`X`: A `pandas.DataFrame` with the `target_times` table data without the `target` column.
+*  ``target_times``: A ``pandas.DataFrame`` with the ``target_times`` table data:
 
-```
-   turbine_id  timestamp
-0          T1 2013-01-01
-1          T1 2013-01-02
-2          T1 2013-01-03
-3          T1 2013-01-04
-4          T1 2013-01-05
-```
+   ```
+     turbine_id cutoff_time  target
+   0       T001  2013-01-12       0
+   1       T001  2013-01-13       0
+   2       T001  2013-01-14       0
+   3       T001  2013-01-15       1
+   4       T001  2013-01-16       0
+   ```
 
-`y`: A `pandas.Series` with the `target` column from the `target_times` table.
+* ``readings``: A ``pandas.DataFrame`` containing the time series data in the format explained above.
 
-```
-0    0.0
-1    0.0
-2    0.0
-3    0.0
-4    0.0
-Name: target, dtype: float64
-```
+   ```
+     turbine_id signal_id  timestamp  value
+   0       T001       S01 2013-01-10  323.0
+   1       T001       S02 2013-01-10  320.0
+   2       T001       S03 2013-01-10  284.0
+   3       T001       S04 2013-01-10  348.0
+   4       T001       S05 2013-01-10  273.0
+   ```
 
-`readings`: A `pandas.DataFrame` containing the time series data in the format explained above.
+Once we have loaded the `target_times` and before proceeding to training any Machine Learning
+Pipeline, we will have split them in 2 partitions for training and testing.
 
-```
-   turbine_id  signal_id  timestamp  value
-0  T1          S1        2013-01-01  817.0
-1  T1          S2        2013-01-01  805.0
-2  T1          S3        2013-01-01  786.0
-3  T1          S4        2013-01-01  809.0
-4  T1          S5        2013-01-01  755.0
-```
+In this case, we will split them using the [train_test_split function from scikit-learn](
+https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html),
+but it can be done with any other suitable tool.
 
-## 2. Split the data
+```python
+from sklearn.model_selection import train_test_split
 
-If we want to split the data in train and test subsets, we can do so by splitting the
-`X` and `y` variables with any suitable tool.
+train, test = train_test_split(target_times, test_size=0.25, random_state=0)
+```
 
-In this case, we will do it using the [train_test_split function from scikit-learn](
-https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html).
+Notice how we are only splitting the `target_times` data and not the `readings`.
+This is because the pipelines will later on take care of selecting the parts of the
+`readings` table needed for the training based on the information found inside
+the `train` and `test` inputs.
 
-```python
-from sklearn.model_selection import train_test_split
+Additionally, if we want to calculate a goodness-of-fit score later on, we can separate the
+testing target values from the `test` table by popping them from it:
 
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
+```python
+test_targets = test.pop('target')
 ```
 
-## 3. Finding a Pipeline
+## 2. Exploring the available Pipelines
 
 Once we have the data ready, we need to find a suitable pipeline.
 
@@ -286,207 +195,68 @@ from greenguard import get_pipelines
 pipelines = get_pipelines()
 ```
 
-The returned `pipeline` variable will be `dict` containing the names of all the pipelines
-available and their paths:
-
-```
-'greenguard_classification'
-'greenguard_regression'
-```
-
-## 3. Finding the best Pipeline
-
-Once we have loaded the data, we create a **GreenGuardPipeline** instance by passing:
-
-* `template (string)`: the name of a template or the path to a template json file.
-* `metric (string or function)`: The name of the metric to use or a metric function to use.
-* `cost (bool)`: Whether the metric is a cost function to be minimized or a score to be maximized.
-
-Optionally, we can also pass defails about the cross validation configuration:
-
-* `stratify`
-* `cv_splits`
-* `shuffle`
-* `random_state`
-
-In this case, we will be loading the `greenguard_classification` pipeline, using
-the `accuracy` metric, and using only 2 cross validation splits:
-
-```python
-from greenguard.pipeline import GreenGuardPipeline
-
-pipeline = GreenGuardPipeline(
-    template='greenguard_classification',
-    metric='f1_macro',
-    cv_splits=5
-)
-```
-
-Once we have created the pipeline, we can call its `tune` method to find the best possible
-hyperparameters for our data, passing the `X`, `y`, and `readings` variables returned by the loader,
-as well as an indication of the number of tuning iterations that we want to perform.
-
-```python
-pipeline.tune(X_train, y_train, readings, iterations=10)
-```
-
-After the tuning process has finished, the hyperparameters have been already set in the classifier.
-
-We can see the found hyperparameters by calling the `get_hyperparameters` method,
-
-```python
-pipeline.get_hyperparameters()
-```
-
-which will return a dictionary with the best hyperparameters found so far:
-
-```
-{
-    "pandas.DataFrame.resample#1": {
-        "rule": "1D",
-        "time_index": "timestamp",
-        "groupby": [
-            "turbine_id",
-            "signal_id"
-        ],
-        "aggregation": "mean"
-    },
-    "pandas.DataFrame.unstack#1": {
-        "level": "signal_id",
-        "reset_index": true
-    },
-    ...
-```
-
-as well as the obtained cross validation score by looking at the `score` attribute of the
-`pipeline` object:
+The returned `pipeline` variable will be `list` containing the names of all the pipelines
+available in the GreenGuard system:
 
-```python
-pipeline.score  # -> 0.6447509660798626
 ```
-
-**NOTE**: If the score is not good enough, we can call the `tune` method again as many times
-as needed and the pipeline will continue its tuning process every time based on the previous
-results!
-
-## 4. Fitting the pipeline
-
-Once we are satisfied with the obtained cross validation score, we can proceed to call
-the `fit` method passing again the same data elements.
-
-This will fit the pipeline with all the training data available using the best hyperparameters
-found during the tuning process:
-
-```python
-pipeline.fit(X_train, y_train, readings)
+['resample_600s_normalize_dfs_1d_xgb_classifier',
+ 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',
+ 'resample_600s_unstack_double_144_lstm_timeseries_classifier',
+ 'resample_3600s_unstack_24_lstm_timeseries_classifier',
+ 'resample_3600s_unstack_double_24_lstm_timeseries_classifier',
+ 'resample_600s_unstack_dfs_1d_xgb_classifier',
+ 'resample_600s_unstack_144_lstm_timeseries_classifier']
 ```
 
-## 5. Use the fitted pipeline
-
-After fitting the pipeline, we are ready to make predictions on new data:
+For the rest of this tutorial, we will select and use the pipeline
+`resample_600s_unstack_normalize_dfs_1d_xgb_classifier` as our template.
 
 ```python
-predictions = pipeline.predict(X_test, readings)
+pipeline_name = 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier'
 ```
 
-And evaluate its prediction performance:
+## 3. Fitting the Pipeline
 
-```python
-from sklearn.metrics import accuracy_score
-
-accuracy_score(y_test, predictions) # -> 0.6413043478260869
-```
-
-## 6. Save and load the pipeline
-
-Since the tuning and fitting process takes time to execute and requires a lot of data, you
-will probably want to save a fitted instance and load it later to analyze new signals
-instead of fitting pipelines over and over again.
+Once we have loaded the data and selected the pipeline that we will use, we have to
+fit it.
 
-This can be done by using the `save` and `load` methods from the `GreenGuardPipeline`.
-
-In order to save an instance, call its `save` method passing it the path and filename
-where the model should be saved.
+For this, we will create an instance of a `GreenGuardPipeline` object passing the name
+of the pipeline that we want to use:
 
 ```python
-path = 'my_pipeline.pkl'
-
-pipeline.save(path)
-```
-
-Once the pipeline is saved, it can be loaded back as a new `GreenGuardPipeline` by using the
-`GreenGuardPipeline.load` method:
+from greenguard.pipeline import GreenGuardPipeline
 
-```python
-new_pipeline = GreenGuardPipeline.load(path)
+pipeline = GreenGuardPipeline(pipeline_name)
 ```
 
-Once loaded, it can be directly used to make predictions on new data.
+And then we can directly fit it to our data by calling its `fit` method and passing in the
+training `target_times` and the complete `readings` table:
 
 ```python
-new_pipeline.predict(X_test, readings)
+pipeline.fit(train, readings)
 ```
 
+## 4. Make predictions
 
-# Use your own Dataset
-
-Once you are familiar with the **GreenGuardPipeline** usage, you will probably want to run it
-on your own dataset.
-
-Here are the necessary steps:
-
-## 1. Prepare the data
-
-Firt of all, you will need to prepare your data as 4 CSV files like the ones described in the
-[data format](#data-format) section above.
-
-## 2. Create a GreenGuardLoader
-
-Once you have the CSV files ready, you will need to import the `greenguard.loader.GreenGuardLoader`
-class and create an instance passing:
-
-* `path - str`: The path to the folder where the 4 CSV files are
-* `target_times - str, gptional`: The name of the target table. Defaults to `target_times`.
-* `target_column - str, optional`: The name of the target column. Defaults to `target`.
-* `readings - str, optional`: The name of the readings table. Defaults to `readings`.
-* `turbines - str, optional`: The name of the turbines table. Defaults to `None`.
-* `signals - str, optional`: The name of the signals table. Defaults to `None`.
-* `gzip - bool, optional`: Set to True if the CSV files are gzipped. Defaults to False.
-
-For example, here we will be loading a custom dataset which has been sorted in gzip format
-inside the `my_dataset` folder, and for which the target table has a different name:
+After fitting the pipeline, we are ready to make predictions on new data by calling the
+`pipeline.predict` method passing the testing `target_times` and, again, the complete
+`readings` table.
 
 ```python
-from greenguard.loader import GreenGuardLoader
-
-loader = GreenGuardLoader(path='my_dataset', target='labels', gzip=True)
+predictions = pipeline.predict(test, readings)
 ```
 
-## 3. Call the loader.load method.
+## 5. Evaluate the goodness-of-fit
 
-Once the `loader` instance has been created, we can call its `load` method:
+Finally, after making predictions we can evaluate how good the prediction was
+using any suitable metric.
 
 ```python
-X, y, tables = loader.load()
-```
+from sklearn.metrics import f1_score
 
-Optionally, if the dataset contains only data to make predictions and the `target` column
-does not exist, we can pass it the argument `False` to skip it:
-
-```python
-X, readings = loader.load(target=False)
+f1_score(test_targets, predictions)
 ```
 
-
-# Docker Usage
-
-**GreenGuard** comes configured and ready to be distributed and run as a docker image which starts
-a jupyter notebook already configured to use greenguard, with all the required dependencies already
-installed.
-
-For more details about how to run GreenGuard over docker, please check the [DOCKER.md](DOCKER.md)
-documentation.
-
 ## What's next?
 
 For more details about **GreenGuard** and all its possibilities and features, please check the
diff --git a/notebooks/CSVLoader Demo.ipynb b/notebooks/CSVLoader Demo.ipynb
new file mode 100644
index 0000000..66dab0b
--- /dev/null
+++ b/notebooks/CSVLoader Demo.ipynb	
@@ -0,0 +1,683 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging;\n",
+    "\n",
+    "logging.basicConfig(level=logging.INFO)\n",
+    "logging.getLogger().setLevel(level=logging.INFO)\n",
+    "\n",
+    "import warnings\n",
+    "warnings.simplefilter(\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-02-03 12:49:43,764 - INFO - demo - Generating file raw_demo/T001/2013-01-.csv\n",
+      "2020-02-03 12:49:44,277 - INFO - demo - Generating file raw_demo/T001/2013-02-.csv\n",
+      "2020-02-03 12:49:44,810 - INFO - demo - Generating file raw_demo/T001/2013-03-.csv\n",
+      "2020-02-03 12:49:45,345 - INFO - demo - Generating file raw_demo/T001/2013-04-.csv\n",
+      "2020-02-03 12:49:45,885 - INFO - demo - Generating file raw_demo/T001/2013-05-.csv\n",
+      "2020-02-03 12:49:46,417 - INFO - demo - Generating file raw_demo/T001/2013-06-.csv\n",
+      "2020-02-03 12:49:46,954 - INFO - demo - Generating file raw_demo/T001/2013-07-.csv\n",
+      "2020-02-03 12:49:47,492 - INFO - demo - Generating file raw_demo/T001/2013-08-.csv\n",
+      "2020-02-03 12:49:48,017 - INFO - demo - Generating file raw_demo/T001/2013-09-.csv\n",
+      "2020-02-03 12:49:48,543 - INFO - demo - Generating file raw_demo/T001/2013-10-.csv\n",
+      "2020-02-03 12:49:49,094 - INFO - demo - Generating file raw_demo/T001/2013-11-.csv\n",
+      "2020-02-03 12:49:49,606 - INFO - demo - Generating file raw_demo/T001/2013-12-.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "from greenguard.demo import generate_raw_readings\n",
+    "\n",
+    "target_times = generate_raw_readings('raw_demo')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(353, 3)"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-13</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-14</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-15</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-16</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id cutoff_time  target\n",
+       "0       T001  2013-01-12       0\n",
+       "1       T001  2013-01-13       0\n",
+       "2       T001  2013-01-14       0\n",
+       "3       T001  2013-01-15       1\n",
+       "4       T001  2013-01-16       0"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.3002832861189802"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.target.mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "turbine_id             object\n",
+       "cutoff_time    datetime64[ns]\n",
+       "target                  int64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from greenguard.loaders import CSVLoader"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-02-03 12:50:11,263 - INFO - csv - Loaded 1306052 readings from turbine T001\n",
+      "2020-02-03 12:50:11,275 - INFO - csv - Loaded 1306052 turbine readings\n",
+      "2020-02-03 12:50:11,500 - INFO - targets - Dropped 2 invalid targets\n"
+     ]
+    }
+   ],
+   "source": [
+    "import numpy as np\n",
+    "\n",
+    "csv_loader = CSVLoader('raw_demo')\n",
+    "target_times, readings = csv_loader.load(target_times, '1d')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1306052, 4)"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-11</td>\n",
+       "      <td>209.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-11</td>\n",
+       "      <td>193.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-11</td>\n",
+       "      <td>177.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-11</td>\n",
+       "      <td>188.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-11</td>\n",
+       "      <td>150.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-11  209.0\n",
+       "1       T001       S02 2013-01-11  193.0\n",
+       "2       T001       S03 2013-01-11  177.0\n",
+       "3       T001       S04 2013-01-11  188.0\n",
+       "4       T001       S05 2013-01-11  150.0"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "turbine_id            object\n",
+       "signal_id             object\n",
+       "timestamp     datetime64[ns]\n",
+       "value                float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(351, 3)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-02-03 12:50:31,423 - INFO - csv - Loaded 1306052 readings from turbine T001\n",
+      "2020-02-03 12:50:31,427 - INFO - csv - Resampling: 4h - mean\n",
+      "2020-02-03 12:50:31,689 - INFO - csv - Loaded 2119 turbine readings\n",
+      "2020-02-03 12:50:31,843 - INFO - targets - Dropped 14 invalid targets\n"
+     ]
+    }
+   ],
+   "source": [
+    "csv_loader = CSVLoader('raw_demo', rule='4h', aggregation='mean', unstack=True)\n",
+    "target_times, readings = csv_loader.load(target_times, '15d')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(2119, 28)"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>253.041667</td>\n",
+       "      <td>268.250000</td>\n",
+       "      <td>268.041667</td>\n",
+       "      <td>297.166667</td>\n",
+       "      <td>234.666667</td>\n",
+       "      <td>261.916667</td>\n",
+       "      <td>206.791667</td>\n",
+       "      <td>3.198335e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.079167</td>\n",
+       "      <td>3.134510e+06</td>\n",
+       "      <td>42.416667</td>\n",
+       "      <td>44.958333</td>\n",
+       "      <td>44.833333</td>\n",
+       "      <td>49.625000</td>\n",
+       "      <td>39.208333</td>\n",
+       "      <td>43.833333</td>\n",
+       "      <td>34.625</td>\n",
+       "      <td>293.166667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>572.083333</td>\n",
+       "      <td>555.291667</td>\n",
+       "      <td>538.666667</td>\n",
+       "      <td>592.291667</td>\n",
+       "      <td>557.166667</td>\n",
+       "      <td>534.000000</td>\n",
+       "      <td>544.250000</td>\n",
+       "      <td>3.199514e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.837500</td>\n",
+       "      <td>3.142505e+06</td>\n",
+       "      <td>62.083333</td>\n",
+       "      <td>62.500000</td>\n",
+       "      <td>63.625000</td>\n",
+       "      <td>63.541667</td>\n",
+       "      <td>61.333333</td>\n",
+       "      <td>62.541667</td>\n",
+       "      <td>54.000</td>\n",
+       "      <td>421.208333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 08:00:00</td>\n",
+       "      <td>688.791667</td>\n",
+       "      <td>696.791667</td>\n",
+       "      <td>706.625000</td>\n",
+       "      <td>750.791667</td>\n",
+       "      <td>714.250000</td>\n",
+       "      <td>683.333333</td>\n",
+       "      <td>658.166667</td>\n",
+       "      <td>3.201449e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>12.754167</td>\n",
+       "      <td>3.155809e+06</td>\n",
+       "      <td>92.208333</td>\n",
+       "      <td>94.958333</td>\n",
+       "      <td>94.666667</td>\n",
+       "      <td>97.333333</td>\n",
+       "      <td>94.125000</td>\n",
+       "      <td>93.583333</td>\n",
+       "      <td>86.375</td>\n",
+       "      <td>638.291667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 12:00:00</td>\n",
+       "      <td>396.333333</td>\n",
+       "      <td>418.500000</td>\n",
+       "      <td>415.791667</td>\n",
+       "      <td>438.541667</td>\n",
+       "      <td>382.250000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>320.333333</td>\n",
+       "      <td>3.203319e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.916667</td>\n",
+       "      <td>3.168640e+06</td>\n",
+       "      <td>55.750000</td>\n",
+       "      <td>60.083333</td>\n",
+       "      <td>58.583333</td>\n",
+       "      <td>61.291667</td>\n",
+       "      <td>52.791667</td>\n",
+       "      <td>52.791667</td>\n",
+       "      <td>44.000</td>\n",
+       "      <td>376.125000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 16:00:00</td>\n",
+       "      <td>390.458333</td>\n",
+       "      <td>408.875000</td>\n",
+       "      <td>409.500000</td>\n",
+       "      <td>458.000000</td>\n",
+       "      <td>415.583333</td>\n",
+       "      <td>363.000000</td>\n",
+       "      <td>364.458333</td>\n",
+       "      <td>3.204504e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.412500</td>\n",
+       "      <td>3.176672e+06</td>\n",
+       "      <td>49.958333</td>\n",
+       "      <td>53.875000</td>\n",
+       "      <td>54.458333</td>\n",
+       "      <td>56.750000</td>\n",
+       "      <td>52.708333</td>\n",
+       "      <td>46.708333</td>\n",
+       "      <td>47.625</td>\n",
+       "      <td>354.750000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id           timestamp   value_S01   value_S02   value_S03  \\\n",
+       "0       T001 2013-01-10 00:00:00  253.041667  268.250000  268.041667   \n",
+       "1       T001 2013-01-10 04:00:00  572.083333  555.291667  538.666667   \n",
+       "2       T001 2013-01-10 08:00:00  688.791667  696.791667  706.625000   \n",
+       "3       T001 2013-01-10 12:00:00  396.333333  418.500000  415.791667   \n",
+       "4       T001 2013-01-10 16:00:00  390.458333  408.875000  409.500000   \n",
+       "\n",
+       "    value_S04   value_S05   value_S06   value_S07     value_S08  ...  \\\n",
+       "0  297.166667  234.666667  261.916667  206.791667  3.198335e+06  ...   \n",
+       "1  592.291667  557.166667  534.000000  544.250000  3.199514e+06  ...   \n",
+       "2  750.791667  714.250000  683.333333  658.166667  3.201449e+06  ...   \n",
+       "3  438.541667  382.250000  364.666667  320.333333  3.203319e+06  ...   \n",
+       "4  458.000000  415.583333  363.000000  364.458333  3.204504e+06  ...   \n",
+       "\n",
+       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
+       "0   9.079167  3.134510e+06  42.416667  44.958333  44.833333  49.625000   \n",
+       "1  10.837500  3.142505e+06  62.083333  62.500000  63.625000  63.541667   \n",
+       "2  12.754167  3.155809e+06  92.208333  94.958333  94.666667  97.333333   \n",
+       "3  10.916667  3.168640e+06  55.750000  60.083333  58.583333  61.291667   \n",
+       "4  10.412500  3.176672e+06  49.958333  53.875000  54.458333  56.750000   \n",
+       "\n",
+       "   value_S23  value_S24  value_S25   value_S26  \n",
+       "0  39.208333  43.833333     34.625  293.166667  \n",
+       "1  61.333333  62.541667     54.000  421.208333  \n",
+       "2  94.125000  93.583333     86.375  638.291667  \n",
+       "3  52.791667  52.791667     44.000  376.125000  \n",
+       "4  52.708333  46.708333     47.625  354.750000  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(337, 3)"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.shape"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/GreenGuard usage example.ipynb b/notebooks/GreenGuard usage example.ipynb
index a764885..e912d19 100644
--- a/notebooks/GreenGuard usage example.ipynb	
+++ b/notebooks/GreenGuard usage example.ipynb	
@@ -37,8 +37,8 @@
     "\n",
     "The first step is to load the data that we are going to use.\n",
     "\n",
-    "In order to use the demo data included in GreenGuard, the `greenguard.loader.load_demo`\n",
-    "can be used."
+    "In order to use the demo data included in GreenGuard, the `greenguard.load_demo`\n",
+    "function can be used."
    ]
   },
   {
@@ -47,9 +47,17 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard import load_demo\n",
+    "from greenguard.demo import load_demo\n",
     "\n",
-    "X, y, readings = load_demo()"
+    "target_times, readings = load_demo()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This will download some demo data from our S3 demo Bucket and load it as\n",
+    "the necessary `target_times` and `readings` tables."
    ]
   },
   {
@@ -80,45 +88,51 @@
        "      <th></th>\n",
        "      <th>turbine_id</th>\n",
        "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>2013-01-01</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>2013-01-02</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-13</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>2013-01-03</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-14</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>2013-01-04</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-15</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>2013-01-05</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-16</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  turbine_id cutoff_time\n",
-       "0      GRID1  2013-01-01\n",
-       "1      GRID1  2013-01-02\n",
-       "2      GRID1  2013-01-03\n",
-       "3      GRID1  2013-01-04\n",
-       "4      GRID1  2013-01-05"
+       "  turbine_id cutoff_time  target\n",
+       "0       T001  2013-01-12       0\n",
+       "1       T001  2013-01-13       0\n",
+       "2       T001  2013-01-14       0\n",
+       "3       T001  2013-01-15       1\n",
+       "4       T001  2013-01-16       0"
       ]
      },
      "execution_count": 3,
@@ -127,7 +141,7 @@
     }
    ],
    "source": [
-    "X.head()"
+    "target_times.head()"
    ]
   },
   {
@@ -138,12 +152,7 @@
     {
      "data": {
       "text/plain": [
-       "0    0\n",
-       "1    0\n",
-       "2    0\n",
-       "3    0\n",
-       "4    0\n",
-       "Name: target, dtype: int64"
+       "(353, 3)"
       ]
      },
      "execution_count": 4,
@@ -152,7 +161,7 @@
     }
    ],
    "source": [
-    "y.head()"
+    "target_times.shape"
    ]
   },
   {
@@ -162,78 +171,11 @@
    "outputs": [
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>WTG01_possible_power_avg</td>\n",
-       "      <td>2013-01-01</td>\n",
-       "      <td>817.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>WTG01_total_active_power</td>\n",
-       "      <td>2013-01-01</td>\n",
-       "      <td>3109970.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>WTG02_possible_power_avg</td>\n",
-       "      <td>2013-01-01</td>\n",
-       "      <td>805.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>WTG02_total_active_power</td>\n",
-       "      <td>2013-01-01</td>\n",
-       "      <td>609852.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>WTG03_possible_power_avg</td>\n",
-       "      <td>2013-01-01</td>\n",
-       "      <td>786.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
       "text/plain": [
-       "  turbine_id                 signal_id  timestamp      value\n",
-       "0      GRID1  WTG01_possible_power_avg 2013-01-01      817.0\n",
-       "1      GRID1  WTG01_total_active_power 2013-01-01  3109970.0\n",
-       "2      GRID1  WTG02_possible_power_avg 2013-01-01      805.0\n",
-       "3      GRID1  WTG02_total_active_power 2013-01-01   609852.0\n",
-       "4      GRID1  WTG03_possible_power_avg 2013-01-01      786.0"
+       "turbine_id             object\n",
+       "cutoff_time    datetime64[ns]\n",
+       "target                  int64\n",
+       "dtype: object"
       ]
      },
      "execution_count": 5,
@@ -242,58 +184,13 @@
     }
    ],
    "source": [
-    "readings.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Alternatively, if you want to load your own dataset, the `GreenGuardLoader` class can be used.\n",
-    "\n",
-    "For example, in order to load the data from the folder where we just downloaded the demo data\n",
-    "we can use this commands:"
+    "target_times.dtypes"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 6,
    "metadata": {},
-   "outputs": [],
-   "source": [
-    "from greenguard.loader import GreenGuardLoader\n",
-    "\n",
-    "loader = GreenGuardLoader('../greenguard/demo', gzip=True)\n",
-    "\n",
-    "X, y, tables = loader.load()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "For further details about the GreenGuardLoder options please check the corresponding\n",
-    "[API Reference page in the docs](https://d3-ai.github.io/GreenGuard/api/greenguard.loader.html#greenguard.loader.GreenGuardLoader)."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "The output of either of the previous commands is:\n",
-    "\n",
-    "* `X`: A pandas.DataFrame with the contents of the\n",
-    "  target table.\n",
-    "* `y`: A pandas.Series with the contents of\n",
-    "  the target column.\n",
-    "* `tables`: A dictionary containing the readings, turbines and\n",
-    "  signals tables as pandas.DataFrames."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -317,259 +214,283 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>turbine_id</th>\n",
-       "      <th>cutoff_time</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>2013-01-01</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>323.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>2013-01-02</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>320.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>2013-01-03</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>284.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>2013-01-04</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>348.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>2013-01-05</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>273.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  turbine_id cutoff_time\n",
-       "0      GRID1  2013-01-01\n",
-       "1      GRID1  2013-01-02\n",
-       "2      GRID1  2013-01-03\n",
-       "3      GRID1  2013-01-04\n",
-       "4      GRID1  2013-01-05"
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-10  323.0\n",
+       "1       T001       S02 2013-01-10  320.0\n",
+       "2       T001       S03 2013-01-10  284.0\n",
+       "3       T001       S04 2013-01-10  348.0\n",
+       "4       T001       S05 2013-01-10  273.0"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "X.head()"
+    "readings.head()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0    0\n",
-       "1    0\n",
-       "2    0\n",
-       "3    0\n",
-       "4    0\n",
-       "Name: target, dtype: int64"
+       "(1313540, 4)"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "y.head()"
+    "readings.shape"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "dict_keys(['readings'])"
+       "turbine_id            object\n",
+       "signal_id             object\n",
+       "timestamp     datetime64[ns]\n",
+       "value                float64\n",
+       "dtype: object"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "tables.keys()"
+    "readings.dtypes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load your own Dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatively, if you want to load your own dataset, all you have to do is load the\n",
+    "`target_times` and `readings` tables as `pandas.DataFrame` objects.\n",
+    "\n",
+    "Make sure to parse the corresponding datetime fields!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#import pandas as pd\n",
+    "\n",
+    "#target_times = pd.read_csv('path/to/your/target_times.csv', parse_dates=['cutoff_time'])\n",
+    "#readings = pd.read_csv('path/to/your/readings.csv', parse_dates=['timestamp'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Split the data\n",
+    "\n",
+    "Once we have loaded the `target_times` and before proceeding to training any Machine Learning\n",
+    "Pipeline, we will have split them in 2 partitions for training and testing.\n",
+    "\n",
+    "In this case, we will split them using the [train_test_split function from scikit-learn](\n",
+    "https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html),/n",
+    "but it can be done with any other suitable tool."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 10,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "train, test = train_test_split(target_times, test_size=0.25, random_state=0)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Finding a Template\n",
+    "\n",
+    "The next step will be to select a template from the ones available in\n",
+    "GreenGuard.\n",
+    "\n",
+    "For this, we can use the `greenguard.get_pipelines` function, which will\n",
+    "return us the list of all the available MLBlocks pipelines found in the\n",
+    "GreenGuard system."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
    "outputs": [
     {
      "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>WTG01_possible_power_avg</td>\n",
-       "      <td>2013-01-01</td>\n",
-       "      <td>817.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>WTG01_total_active_power</td>\n",
-       "      <td>2013-01-01</td>\n",
-       "      <td>3109970.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>WTG02_possible_power_avg</td>\n",
-       "      <td>2013-01-01</td>\n",
-       "      <td>805.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>WTG02_total_active_power</td>\n",
-       "      <td>2013-01-01</td>\n",
-       "      <td>609852.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>GRID1</td>\n",
-       "      <td>WTG03_possible_power_avg</td>\n",
-       "      <td>2013-01-01</td>\n",
-       "      <td>786.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
       "text/plain": [
-       "  turbine_id                 signal_id  timestamp      value\n",
-       "0      GRID1  WTG01_possible_power_avg 2013-01-01      817.0\n",
-       "1      GRID1  WTG01_total_active_power 2013-01-01  3109970.0\n",
-       "2      GRID1  WTG02_possible_power_avg 2013-01-01      805.0\n",
-       "3      GRID1  WTG02_total_active_power 2013-01-01   609852.0\n",
-       "4      GRID1  WTG03_possible_power_avg 2013-01-01      786.0"
+       "['resample_600s_normalize_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_double_144_lstm_timeseries_classifier',\n",
+       " 'resample_3600s_unstack_24_lstm_timeseries_classifier',\n",
+       " 'resample_3600s_unstack_double_24_lstm_timeseries_classifier',\n",
+       " 'resample_600s_unstack_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_144_lstm_timeseries_classifier']"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "tables['readings'].head()"
+    "from greenguard import get_pipelines\n",
+    "\n",
+    "get_pipelines()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 2. Split the data\n",
-    "\n",
-    "If we want to split the data in train and test subsets, we can do so by splitting\n",
-    "the X and y variables with any suitable tool.\n",
-    "\n",
-    "In this case, we will do it using the `train_test_split` function from scikit-learn."
+    "Optionally, we can pass a string to select the pipelines that contain it:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['resample_600s_normalize_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_dfs_1d_xgb_classifier']"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)"
+    "get_pipelines('dfs')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Additionally, we can pass the keyword `path=True` to obtain a dictionary containing\n",
+    "also the path to the pipelines instead of only the list of names."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'timeseries_classification': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/timeseries_classification.json',\n",
-       " 'resample_dfs_xgb_classification': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_dfs_xgb_classification.json',\n",
-       " 'resample_normalize_dfs_xgb_classification': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_normalize_dfs_xgb_classification.json',\n",
-       " 'greenguard_regression': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/greenguard_regression.json',\n",
-       " 'greenguard_classification': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/greenguard_classification.json'}"
+       "{'resample_600s_normalize_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_unstack_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json'}"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from greenguard import get_pipelines\n",
-    "\n",
-    "get_pipelines()"
+    "get_pipelines('dfs', path=True)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 8,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "from greenguard import GreenGuardPipeline\n",
-    "\n",
-    "pipeline = GreenGuardPipeline('resample_dfs_xgb_classification', 'f1')"
+    "For the rest of this tutorial, we will select and use the pipeline\n",
+    "`resample_600s_unstack_normalize_dfs_1d_xgb_classifier` as our template."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
-    "pipeline.fit(X_train, y_train, readings)"
+    "template = 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier'"
    ]
   },
   {
@@ -589,21 +510,18 @@
     "* `stratify`\n",
     "* `cv_splits`\n",
     "* `shuffle`\n",
-    "* `random_state`\n",
-    "\n",
-    "In this case, we will be loading the `greenguard_classification` pipeline, using\n",
-    "the `accuracy` metric, and using only 2 cross validation splits:"
+    "* `random_state`"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
     "from greenguard.pipeline import GreenGuardPipeline\n",
     "\n",
-    "pipeline = GreenGuardPipeline(template='greenguard_classification', metric='accuracy', cv_splits=2)"
+    "pipeline = GreenGuardPipeline(template, metric='f1', cv_splits=3)"
    ]
   },
   {
@@ -611,29 +529,59 @@
    "metadata": {},
    "source": [
     "Once we have created the pipeline, we can call its `tune` method to find the best possible\n",
-    "hyperparameters for our data, passing the `X`, `y`, and `tables` variables returned by the loader,\n",
+    "hyperparameters for our data, passing the `target_times` and `readings` variables,\n",
     "as well as an indication of the number of tuning iterations that we want to perform."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2019-06-18 11:43:31,518 - INFO - pipeline - Scoring the default pipeline\n",
-      "2019-06-18 11:45:46,250 - INFO - pipeline - Default Pipeline score: 0.6447509660798626\n",
-      "2019-06-18 11:45:46,252 - INFO - pipeline - Scoring pipeline 1\n",
-      "2019-06-18 11:45:46,253 - INFO - gp - Using Uniform sampler as user specified r_minimum threshold is not met to start the GP based learning\n",
-      "2019-06-18 11:48:23,348 - INFO - pipeline - Pipeline 1 score: 0.6813278231000429\n"
+      "2020-02-03 12:51:46,145 - INFO - pipeline - Scoring the default pipeline\n",
+      "2020-02-03 12:51:46,147 - INFO - pipeline - Running static steps before cross validation\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Built 165 features\n",
+      "Elapsed: 00:32 | Progress: 100%|██████████\n",
+      "Elapsed: 00:16 | Progress: 100%|██████████\n",
+      "Built 165 features\n",
+      "Elapsed: 00:33 | Progress: 100%|██████████\n",
+      "Elapsed: 00:15 | Progress: 100%|██████████\n",
+      "Built 165 features\n",
+      "Elapsed: 00:31 | Progress: 100%|██████████\n",
+      "Elapsed: 00:15 | Progress: 100%|██████████\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-02-03 12:54:14,195 - INFO - pipeline - Default Pipeline score: 0.605187908496732\n",
+      "2020-02-03 12:54:14,196 - INFO - pipeline - Scoring pipeline 1\n",
+      "2020-02-03 12:54:14,199 - INFO - gp - Using Uniform sampler as user specified r_minimum threshold is not met to start the GP based learning\n",
+      "2020-02-03 12:54:14,380 - INFO - pipeline - Pipeline 1 score: 0.5976760567286199\n",
+      "2020-02-03 12:54:14,385 - INFO - pipeline - Scoring pipeline 2\n",
+      "2020-02-03 12:54:14,888 - INFO - pipeline - Pipeline 2 score: 0.5965798320999443\n",
+      "2020-02-03 12:54:14,890 - INFO - pipeline - Scoring pipeline 3\n",
+      "2020-02-03 12:54:15,313 - INFO - pipeline - Pipeline 3 score: 0.6431783902372138\n",
+      "2020-02-03 12:54:15,316 - INFO - pipeline - Scoring pipeline 4\n",
+      "2020-02-03 12:54:15,729 - INFO - pipeline - Pipeline 4 score: 0.5642664541017163\n",
+      "2020-02-03 12:54:15,731 - INFO - pipeline - Scoring pipeline 5\n",
+      "2020-02-03 12:54:15,883 - INFO - pipeline - Pipeline 5 score: 0.5859328579916815\n"
      ]
     }
    ],
    "source": [
-    "pipeline.tune(X_train, y_train, tables, iterations=1)"
+    "pipeline.tune(target_times, readings, iterations=5)"
    ]
   },
   {
@@ -648,162 +596,145 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'pandas.DataFrame.resample#1': {},\n",
-       " 'pandas.DataFrame.unstack#1': {},\n",
-       " 'featuretools.EntitySet.entity_from_dataframe#1': {},\n",
-       " 'featuretools.EntitySet.entity_from_dataframe#2': {},\n",
-       " 'featuretools.EntitySet.entity_from_dataframe#3': {},\n",
-       " 'featuretools.EntitySet.add_relationship#1': {},\n",
-       " 'featuretools.dfs#1': {'max_depth': {'type': 'int',\n",
-       "   'default': 1,\n",
-       "   'range': [1, 3],\n",
-       "   'values': None},\n",
-       "  'remove_low_information': {'type': 'bool', 'default': True}},\n",
-       " 'mlprimitives.custom.feature_extraction.CategoricalEncoder#1': {'max_labels': {'type': 'int',\n",
-       "   'default': 0,\n",
-       "   'range': [0, 100]}},\n",
-       " 'sklearn.impute.SimpleImputer#1': {'strategy': {'type': 'str',\n",
-       "   'default': 'mean',\n",
-       "   'values': ['mean', 'median', 'most_frequent', 'constant']}},\n",
-       " 'sklearn.preprocessing.StandardScaler#1': {'with_mean': {'type': 'bool',\n",
-       "   'default': True},\n",
-       "  'with_std': {'type': 'bool', 'default': True}},\n",
-       " 'xgboost.XGBClassifier#1': {'n_estimators': {'type': 'int',\n",
-       "   'default': 100,\n",
-       "   'range': [10, 1000]},\n",
-       "  'max_depth': {'type': 'int', 'default': 3, 'range': [3, 10]},\n",
-       "  'learning_rate': {'type': 'float', 'default': 0.1, 'range': [0, 1]},\n",
-       "  'gamma': {'type': 'float', 'default': 0, 'range': [0, 1]},\n",
-       "  'min_child_weight': {'type': 'int', 'default': 1, 'range': [1, 10]}}}"
+       "{'mlprimitives.custom.feature_extraction.CategoricalEncoder#1': {'max_labels': 28},\n",
+       " 'xgboost.XGBClassifier#1': {'n_estimators': 549,\n",
+       "  'max_depth': 3,\n",
+       "  'learning_rate': 0.09499856413762053,\n",
+       "  'gamma': 0.48809516357182936,\n",
+       "  'min_child_weight': 7}}"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "pipeline._pipeline.get_tunable_hyperparameters()"
+    "pipeline.get_hyperparameters()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can  also see the obtained cross validation score by looking at the `cv_score` attribute of the\n",
+    "`pipeline` object:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'pandas.DataFrame.resample#1': {'rule': '1D',\n",
-       "  'time_index': 'timestamp',\n",
-       "  'groupby': ['turbine_id', 'signal_id'],\n",
-       "  'aggregation': 'mean'},\n",
-       " 'pandas.DataFrame.unstack#1': {'level': 'signal_id', 'reset_index': True},\n",
-       " 'featuretools.EntitySet.entity_from_dataframe#1': {'entityset_id': 'entityset',\n",
-       "  'entity_id': 'readings',\n",
-       "  'index': 'index',\n",
-       "  'variable_types': None,\n",
-       "  'make_index': True,\n",
-       "  'time_index': 'timestamp',\n",
-       "  'secondary_time_index': None,\n",
-       "  'already_sorted': False},\n",
-       " 'featuretools.EntitySet.entity_from_dataframe#2': {'entityset_id': 'entityset',\n",
-       "  'entity_id': 'turbines',\n",
-       "  'index': 'turbine_id',\n",
-       "  'variable_types': None,\n",
-       "  'make_index': False,\n",
-       "  'time_index': None,\n",
-       "  'secondary_time_index': None,\n",
-       "  'already_sorted': False},\n",
-       " 'featuretools.EntitySet.entity_from_dataframe#3': {'entityset_id': 'entityset',\n",
-       "  'entity_id': 'signals',\n",
-       "  'index': 'signal_id',\n",
-       "  'variable_types': None,\n",
-       "  'make_index': False,\n",
-       "  'time_index': None,\n",
-       "  'secondary_time_index': None,\n",
-       "  'already_sorted': False},\n",
-       " 'featuretools.EntitySet.add_relationship#1': {'parent': 'turbines',\n",
-       "  'parent_column': 'turbine_id',\n",
-       "  'child': 'readings',\n",
-       "  'child_column': 'turbine_id'},\n",
-       " 'featuretools.dfs#1': {'target_entity': 'turbines',\n",
-       "  'index': 'turbine_id',\n",
-       "  'time_index': 'timestamp',\n",
-       "  'agg_primitives': None,\n",
-       "  'trans_primitives': None,\n",
-       "  'copy': False,\n",
-       "  'encode': False,\n",
-       "  'max_depth': 3,\n",
-       "  'remove_low_information': True},\n",
-       " 'mlprimitives.custom.feature_extraction.CategoricalEncoder#1': {'copy': True,\n",
-       "  'features': 'auto',\n",
-       "  'max_labels': 23},\n",
-       " 'sklearn.impute.SimpleImputer#1': {'missing_values': nan,\n",
-       "  'fill_value': None,\n",
-       "  'verbose': False,\n",
-       "  'copy': True,\n",
-       "  'strategy': 'constant'},\n",
-       " 'sklearn.preprocessing.StandardScaler#1': {'with_mean': True,\n",
-       "  'with_std': False},\n",
-       " 'xgboost.XGBClassifier#1': {'n_jobs': -1,\n",
-       "  'n_estimators': 353,\n",
-       "  'max_depth': 4,\n",
-       "  'learning_rate': 0.6150792206840879,\n",
-       "  'gamma': 0.46831924909241274,\n",
-       "  'min_child_weight': 3}}"
+       "0.6431783902372138"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "pipeline.get_hyperparameters()"
+    "pipeline.cv_score"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can  also see the obtained cross validation score by looking at the `score` attribute of the\n",
-    "`pipeline` object:"
+    "**NOTE**: If the score is not good enough, we can call the `tune` method again as many times\n",
+    "as needed and the pipeline will continue its tuning process every time based on the previous\n",
+    "results!"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-02-03 12:54:15,971 - INFO - pipeline - Scoring pipeline 1\n",
+      "2020-02-03 12:54:16,421 - INFO - pipeline - Pipeline 1 score: 0.6220467704338674\n",
+      "2020-02-03 12:54:16,423 - INFO - pipeline - Scoring pipeline 2\n",
+      "2020-02-03 12:54:16,795 - INFO - pipeline - Pipeline 2 score: 0.5867369345630215\n",
+      "2020-02-03 12:54:16,797 - INFO - pipeline - Scoring pipeline 3\n",
+      "2020-02-03 12:54:17,227 - INFO - pipeline - Pipeline 3 score: 0.6161616161616162\n",
+      "2020-02-03 12:54:17,229 - INFO - pipeline - Scoring pipeline 4\n",
+      "2020-02-03 12:54:17,725 - INFO - pipeline - Pipeline 4 score: 0.6037324896256047\n",
+      "2020-02-03 12:54:17,727 - INFO - pipeline - Scoring pipeline 5\n",
+      "2020-02-03 12:54:18,287 - INFO - pipeline - Pipeline 5 score: 0.6169717350045217\n",
+      "2020-02-03 12:54:18,288 - INFO - pipeline - Scoring pipeline 6\n",
+      "2020-02-03 12:54:18,744 - INFO - pipeline - Pipeline 6 score: 0.639102564102564\n",
+      "2020-02-03 12:54:18,746 - INFO - pipeline - Scoring pipeline 7\n",
+      "2020-02-03 12:54:19,171 - INFO - pipeline - Pipeline 7 score: 0.6724889262202695\n",
+      "2020-02-03 12:54:19,174 - INFO - pipeline - Scoring pipeline 8\n",
+      "2020-02-03 12:54:19,627 - INFO - pipeline - Pipeline 8 score: 0.628250663400694\n",
+      "2020-02-03 12:54:19,629 - INFO - pipeline - Scoring pipeline 9\n",
+      "2020-02-03 12:54:20,250 - INFO - pipeline - Pipeline 9 score: 0.656191724941725\n",
+      "2020-02-03 12:54:20,253 - INFO - pipeline - Scoring pipeline 10\n",
+      "2020-02-03 12:54:20,799 - INFO - pipeline - Pipeline 10 score: 0.639014073371284\n"
+     ]
+    }
+   ],
+   "source": [
+    "pipeline.tune(target_times, readings, iterations=10)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.659349506225848"
+       "0.6724889262202695"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "pipeline.score"
+    "pipeline.cv_score"
    ]
   },
   {
-   "cell_type": "markdown",
+   "cell_type": "code",
+   "execution_count": 21,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'mlprimitives.custom.feature_extraction.CategoricalEncoder#1': {'max_labels': 35},\n",
+       " 'xgboost.XGBClassifier#1': {'n_estimators': 542,\n",
+       "  'max_depth': 9,\n",
+       "  'learning_rate': 0.8024814826871371,\n",
+       "  'gamma': 0.8891378840299992,\n",
+       "  'min_child_weight': 10}}"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "**NOTE**: If the score is not good enough, we can call the `tune` method again as many times\n",
-    "as needed and the pipeline will continue its tuning process every time based on the previous\n",
-    "results!"
+    "pipeline.get_hyperparameters()"
    ]
   },
   {
@@ -821,11 +752,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 22,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Built 165 features\n",
+      "Elapsed: 00:35 | Progress: 100%|██████████\n"
+     ]
+    }
+   ],
    "source": [
-    "pipeline.fit(X_train, y_train, tables)"
+    "pipeline.fit(train, readings)"
    ]
   },
   {
@@ -839,11 +779,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 23,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Elapsed: 00:11 | Progress: 100%|██████████\n"
+     ]
+    }
+   ],
    "source": [
-    "predictions = pipeline.predict(X_test, tables)"
+    "predictions = pipeline.predict(test, readings)"
    ]
   },
   {
@@ -855,24 +803,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6413043478260869"
+       "0.7058823529411765"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.metrics import f1_score\n",
     "\n",
-    "accuracy_score(y_test, predictions)"
+    "f1_score(test['target'], predictions)"
    ]
   },
   {
@@ -893,7 +841,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -912,7 +860,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -928,22 +876,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Elapsed: 00:11 | Progress: 100%|██████████\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "array([1., 0., 0., 0., 0.])"
+       "array([0, 0, 0, 1, 0])"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 27,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "predictions = new_pipeline.predict(X_test, tables)\n",
+    "predictions = new_pipeline.predict(test, readings)\n",
     "predictions[0:5]"
    ]
   }
@@ -964,7 +919,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.6.9"
   }
  },
  "nbformat": 4,

From a36a02446f1ae44fe5ef1cdf0d512b698f4d9e02 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 7 Feb 2020 11:09:17 -0500
Subject: [PATCH 007/171] Update notebooks and demo loading

---
 greenguard/demo.py                       |   2 +-
 greenguard/demo.py.new                   |  66 --------
 greenguard/pipeline.py                   |  11 +-
 notebooks/CSVLoader Demo.ipynb           | 183 +++++++++++++++++++++--
 notebooks/GreenGuard usage example.ipynb |  55 ++++---
 5 files changed, 216 insertions(+), 101 deletions(-)
 delete mode 100644 greenguard/demo.py.new

diff --git a/greenguard/demo.py b/greenguard/demo.py
index 789a50d..bae6a64 100644
--- a/greenguard/demo.py
+++ b/greenguard/demo.py
@@ -64,6 +64,6 @@ def generate_raw_readings(output_path='demo'):
             month_data['timestamp'] = month_data['timestamp'].dt.strftime('%m/%d/%y %M:%H:%S')
             month_path = os.path.join(turbine_path, '2013-{:02d}-.csv'.format(month))
             LOGGER.info('Generating file %s', month_path)
-            month_data.to_csv(month_path, index=False)
+            month_data[['signal_id', 'timestamp', 'value']].to_csv(month_path, index=False)
 
     return target_times
diff --git a/greenguard/demo.py.new b/greenguard/demo.py.new
deleted file mode 100644
index 62a9eb1..0000000
--- a/greenguard/demo.py.new
+++ /dev/null
@@ -1,66 +0,0 @@
-import os
-import random
-from datetime import datetime, timedelta
-
-import pandas as pd
-
-
-def get_turbine_df(start, end, interval, signals):
-    data = list()
-    current = start
-    delta = timedelta(seconds=interval)
-    while current < end:
-        for signal in signals:
-            data.append({
-                'timestamp': current.strftime('%m/%d/%y %H:%M:%S'),
-                'signal_id': signal,
-                'value': random.random()
-            })
-
-        current = current + delta
-
-    return pd.DataFrame(data)[['timestamp', 'signal_id', 'value']]
-
-
-def generate_turbine_files(data_path, turbine_name, signals, interval):
-    turbine_path = os.path.join(data_path, turbine_name)
-    os.makedirs(turbine_path, exist_ok=True)
-
-    for year in range(2000, 2011):
-        for month in range(1, 13):
-            start = datetime(year, month, 1)
-            end = datetime(year + (1 if month == 12 else 0), (month % 12) + 1, 1)
-            tdf = get_turbine_df(start, end, interval, signals)
-
-            csv_path = os.path.join(turbine_path, '{}-{:02d}-.csv'.format(year, month))
-            tdf.to_csv(csv_path)
-
-def _prefixed_range(prefix, size):
-    arr = pd.Series(np.arange(size) + 1).astype(str)
-    arr = arr.str.zfill(arr.str.len().max())
-
-    return prefix + arr
-
-
-def make_demo(path='.', signals=1, turbines=1, interval=600):
-    signals = _prefixed_range('S', signals)
-    turbines = _prefixed_range('S', turbines)
-    readings_path = os.path.join(path, readings)
-
-    for turbine in turbines:
-        generate_turbine_files(readings_path, turbine, signals, interval)
-
-
-    target_times = pd.DataFrame([
-        {'turbine_id': 'T001', 'cutoff_time': datetime(2005, 1, 1), 'target': False},
-        {'turbine_id': 'T001', 'cutoff_time': datetime(2007, 1, 2), 'target': True},
-        {'turbine_id': 'T001', 'cutoff_time': datetime(2009, 1, 2), 'target': False},
-        {'turbine_id': 'T002', 'cutoff_time': datetime(2005, 1, 1), 'target': True},
-        {'turbine_id': 'T002', 'cutoff_time': datetime(2007, 1, 2), 'target': False},
-        {'turbine_id': 'T002', 'cutoff_time': datetime(2009, 1, 2), 'target': True},
-        {'turbine_id': 'T003', 'cutoff_time': datetime(2005, 1, 1), 'target': False},
-        {'turbine_id': 'T003', 'cutoff_time': datetime(2007, 1, 2), 'target': True},
-        {'turbine_id': 'T003', 'cutoff_time': datetime(2009, 1, 2), 'target': False},
-    ])
-
-    target_times.to_csv('target_times.csv', index=False)
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 9783052..dd70e8a 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -22,7 +22,7 @@
 PIPELINES_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'pipelines'))
 
 
-def get_pipelines(pattern='', path=False):
+def get_pipelines(pattern='', path=False, unstacked=False):
     """Get the list of available pipelines.
 
     Optionally filter the names using a patter or obtain
@@ -34,6 +34,9 @@ def get_pipelines(pattern='', path=False):
         path (bool):
             Whether to return a dictionary containing the pipeline
             paths instead of only a list with the names.
+        unstacked (bool):
+            Whether to load the pipelines that expect the readings
+            to be already unstacked by signal_id. Defaults to ``False``.
 
     Return:
         list or dict:
@@ -42,7 +45,11 @@ def get_pipelines(pattern='', path=False):
             names as keys and their absolute paths as values.
     """
     pipelines = dict()
-    for filename in os.listdir(PIPELINES_DIR):
+    pipelines_dir = PIPELINES_DIR
+    if unstacked:
+        pipelines_dir = os.path.join(pipelines_dir, 'unstacked')
+
+    for filename in os.listdir(pipelines_dir):
         if filename.endswith('.json') and pattern in filename:
             name = os.path.basename(filename)[:-len('.json')]
             pipeline_path = os.path.join(PIPELINES_DIR, filename)
diff --git a/notebooks/CSVLoader Demo.ipynb b/notebooks/CSVLoader Demo.ipynb
index 66dab0b..4710596 100644
--- a/notebooks/CSVLoader Demo.ipynb	
+++ b/notebooks/CSVLoader Demo.ipynb	
@@ -1,8 +1,39 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# CSVLoader Demo\n",
+    "\n",
+    "This notebook shows how to use the CSVLoader class to load readings from a folder\n",
+    "containing readings in the raw format.\n",
+    "\n",
+    "Details about the raw readings format can be found in the documentation site.\n",
+    "\n",
+    "In this notebook we will:\n",
+    "\n",
+    "- Generate a folder with readings in the raw format based on the demo data\n",
+    "- Load the redings needed for our target times\n",
+    "- Explore different options from the CSVLoader\n",
+    "- Load a pipeline and use it on the loaded data\n",
+    "- Load the readings in the unstacked format\n",
+    "- Load an unstacked pipeline and use it on the loaded data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0. Setup the logging\n",
+    "\n",
+    "This step sets up logging in our environment to increase our visibility over\n",
+    "the steps that GreenGuard performs."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -15,9 +46,19 @@
     "warnings.simplefilter(\"ignore\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Generate Raw Readings\n",
+    "\n",
+    "The first step will be to execute the `generate_raw_readings` function, which will create a\n",
+    "folder in the indicated path and populate it with the raw version of the demo readings."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {
     "scrolled": true
    },
@@ -26,25 +67,137 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-03 12:49:43,764 - INFO - demo - Generating file raw_demo/T001/2013-01-.csv\n",
-      "2020-02-03 12:49:44,277 - INFO - demo - Generating file raw_demo/T001/2013-02-.csv\n",
-      "2020-02-03 12:49:44,810 - INFO - demo - Generating file raw_demo/T001/2013-03-.csv\n",
-      "2020-02-03 12:49:45,345 - INFO - demo - Generating file raw_demo/T001/2013-04-.csv\n",
-      "2020-02-03 12:49:45,885 - INFO - demo - Generating file raw_demo/T001/2013-05-.csv\n",
-      "2020-02-03 12:49:46,417 - INFO - demo - Generating file raw_demo/T001/2013-06-.csv\n",
-      "2020-02-03 12:49:46,954 - INFO - demo - Generating file raw_demo/T001/2013-07-.csv\n",
-      "2020-02-03 12:49:47,492 - INFO - demo - Generating file raw_demo/T001/2013-08-.csv\n",
-      "2020-02-03 12:49:48,017 - INFO - demo - Generating file raw_demo/T001/2013-09-.csv\n",
-      "2020-02-03 12:49:48,543 - INFO - demo - Generating file raw_demo/T001/2013-10-.csv\n",
-      "2020-02-03 12:49:49,094 - INFO - demo - Generating file raw_demo/T001/2013-11-.csv\n",
-      "2020-02-03 12:49:49,606 - INFO - demo - Generating file raw_demo/T001/2013-12-.csv\n"
+      "2020-02-03 14:26:17,008 - INFO - demo - Generating file raw_readings/T001/2013-01-.csv\n",
+      "2020-02-03 14:26:17,518 - INFO - demo - Generating file raw_readings/T001/2013-02-.csv\n",
+      "2020-02-03 14:26:18,045 - INFO - demo - Generating file raw_readings/T001/2013-03-.csv\n",
+      "2020-02-03 14:26:18,580 - INFO - demo - Generating file raw_readings/T001/2013-04-.csv\n",
+      "2020-02-03 14:26:19,118 - INFO - demo - Generating file raw_readings/T001/2013-05-.csv\n",
+      "2020-02-03 14:26:19,668 - INFO - demo - Generating file raw_readings/T001/2013-06-.csv\n",
+      "2020-02-03 14:26:20,219 - INFO - demo - Generating file raw_readings/T001/2013-07-.csv\n",
+      "2020-02-03 14:26:20,753 - INFO - demo - Generating file raw_readings/T001/2013-08-.csv\n",
+      "2020-02-03 14:26:21,304 - INFO - demo - Generating file raw_readings/T001/2013-09-.csv\n",
+      "2020-02-03 14:26:21,852 - INFO - demo - Generating file raw_readings/T001/2013-10-.csv\n",
+      "2020-02-03 14:26:22,388 - INFO - demo - Generating file raw_readings/T001/2013-11-.csv\n",
+      "2020-02-03 14:26:22,931 - INFO - demo - Generating file raw_readings/T001/2013-12-.csv\n"
      ]
     }
    ],
    "source": [
     "from greenguard.demo import generate_raw_readings\n",
     "\n",
-    "target_times = generate_raw_readings('raw_demo')"
+    "readings_path = 'raw_readings'\n",
+    "\n",
+    "target_times = generate_raw_readings(readings_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This function will generate a set of reading files in the raw format.\n",
+    "\n",
+    "We will load one of them to explore it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "readings_sample = pd.read_csv('raw_readings/T001/2013-01-.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>01/10/13 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>01/10/13 00:00:00</td>\n",
+       "      <td>320.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>01/10/13 00:00:00</td>\n",
+       "      <td>284.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>01/10/13 00:00:00</td>\n",
+       "      <td>348.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>01/10/13 00:00:00</td>\n",
+       "      <td>273.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id          timestamp  value\n",
+       "0       T001       S01  01/10/13 00:00:00  323.0\n",
+       "1       T001       S02  01/10/13 00:00:00  320.0\n",
+       "2       T001       S03  01/10/13 00:00:00  284.0\n",
+       "3       T001       S04  01/10/13 00:00:00  348.0\n",
+       "4       T001       S05  01/10/13 00:00:00  273.0"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings_sample.head()"
    ]
   },
   {
diff --git a/notebooks/GreenGuard usage example.ipynb b/notebooks/GreenGuard usage example.ipynb
index e912d19..c3179c0 100644
--- a/notebooks/GreenGuard usage example.ipynb	
+++ b/notebooks/GreenGuard usage example.ipynb	
@@ -11,7 +11,24 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This notebook shows how to use GreenGuard to fit a pipeline and later on use it to make predictions on new data and evaluate the pipeline performance."
+    "This notebook shows how to use GreenGuard to:\n",
+    "\n",
+    "- Load some demo data\n",
+    "- Find available pipelines and load one as a template\n",
+    "- Tune the template arguments to generate the optimal pipeline\n",
+    "- Fit the pipeline to our data\n",
+    "- Make predictions using the pipeline\n",
+    "- Evaluate the goodness-of-fit"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0. Setup the logging\n",
+    "\n",
+    "This step sets up logging in our environment to increase our visibility over\n",
+    "the steps that GreenGuard performs."
    ]
   },
   {
@@ -37,8 +54,7 @@
     "\n",
     "The first step is to load the data that we are going to use.\n",
     "\n",
-    "In order to use the demo data included in GreenGuard, the `greenguard.load_demo`\n",
-    "function can be used."
+    "In order to use the demo data included in GreenGuard, the `greenguard.demo.load_demo` function can be used."
    ]
   },
   {
@@ -57,7 +73,9 @@
    "metadata": {},
    "source": [
     "This will download some demo data from our S3 demo Bucket and load it as\n",
-    "the necessary `target_times` and `readings` tables."
+    "the necessary `target_times` and `readings` tables.\n",
+    "\n",
+    "The exact format of these tables is described in the GreenGuard README and docs:"
    ]
   },
   {
@@ -335,19 +353,14 @@
     "Alternatively, if you want to load your own dataset, all you have to do is load the\n",
     "`target_times` and `readings` tables as `pandas.DataFrame` objects.\n",
     "\n",
-    "Make sure to parse the corresponding datetime fields!"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#import pandas as pd\n",
+    "Make sure to parse the corresponding datetime fields!\n",
+    "\n",
+    "```python\n",
+    "import pandas as pd\n",
     "\n",
-    "#target_times = pd.read_csv('path/to/your/target_times.csv', parse_dates=['cutoff_time'])\n",
-    "#readings = pd.read_csv('path/to/your/readings.csv', parse_dates=['timestamp'])"
+    "target_times = pd.read_csv('path/to/your/target_times.csv', parse_dates=['cutoff_time'])\n",
+    "readings = pd.read_csv('path/to/your/readings.csv', parse_dates=['timestamp'])\n",
+    "```"
    ]
   },
   {
@@ -481,7 +494,15 @@
    "metadata": {},
    "source": [
     "For the rest of this tutorial, we will select and use the pipeline\n",
-    "`resample_600s_unstack_normalize_dfs_1d_xgb_classifier` as our template."
+    "`resample_600s_unstack_normalize_dfs_1d_xgb_classifier` as our template.\n",
+    "\n",
+    "This templates contains the following steps:\n",
+    "\n",
+    "- Resample the data using a 10 minute average aggregation\n",
+    "- Unstack the data by signal, so each signal is in a different column\n",
+    "- Normalize the Turbine IDs into a new table to assist DFS aggregations\n",
+    "- Use DFS on the readings based on the target_times cutoff times using a 1d window size\n",
+    "- Apply an XGBoost Classifier"
    ]
   },
   {

From 1b2cd20d083092e9c0b62f3fefb3f9272d2102a0 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 7 Feb 2020 11:12:03 -0500
Subject: [PATCH 008/171] Remove unused import

---
 greenguard/__init__.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index c530d4e..1d98a94 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -8,7 +8,6 @@
 
 import os
 
-from greenguard.demo import load_demo
 from greenguard.pipeline import GreenGuardPipeline, get_pipelines
 
 _BASE_PATH = os.path.abspath(os.path.dirname(__file__))

From daa5276ee13c8eeb6650612e82aa818abdbfbc4a Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 7 Feb 2020 11:19:29 -0500
Subject: [PATCH 009/171] re-enable py35

---
 setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 5893f14..ab0d545 100644
--- a/setup.py
+++ b/setup.py
@@ -76,6 +76,7 @@
         'License :: OSI Approved :: MIT License',
         'Natural Language :: English',
         'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
     ],
@@ -97,7 +98,7 @@
     long_description_content_type='text/markdown',
     name='greenguard',
     packages=find_packages(include=['greenguard', 'greenguard.*']),
-    python_requires='>=3.6',
+    python_requires='>=3.5',
     setup_requires=setup_requires,
     test_suite='tests',
     tests_require=tests_require,

From e9b4b56e168c60602e534b236fb1c14939265fda Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 10 Feb 2020 19:56:01 -0500
Subject: [PATCH 010/171] Add tutorials and documentation

---
 .gitignore                                    |    1 +
 README.md                                     |   19 +-
 docs/advanced_usage/concepts.md               |   56 +
 docs/advanced_usage/csv.md                    |   54 +
 docs/advanced_usage/docker.md                 |  107 ++
 docs/index.rst                                |    8 +
 greenguard/loaders/csv.py                     |    2 +-
 greenguard/pipeline.py                        |   34 +-
 ...e.ipynb => 1. GreenGuard Quickstart.ipynb} |    7 +-
 notebooks/2. Extract Readings.ipynb           | 1214 +++++++++++++++++
 notebooks/CSVLoader Demo.ipynb                |  836 ------------
 11 files changed, 1484 insertions(+), 854 deletions(-)
 create mode 100644 docs/advanced_usage/concepts.md
 create mode 100644 docs/advanced_usage/csv.md
 create mode 100644 docs/advanced_usage/docker.md
 rename notebooks/{GreenGuard usage example.ipynb => 1. GreenGuard Quickstart.ipynb} (99%)
 create mode 100644 notebooks/2. Extract Readings.ipynb
 delete mode 100644 notebooks/CSVLoader Demo.ipynb

diff --git a/.gitignore b/.gitignore
index 1184ed0..fc59bb2 100644
--- a/.gitignore
+++ b/.gitignore
@@ -106,6 +106,7 @@ ENV/
 .*.swp
 
 greenguard/demo/
+notebooks/
 notebooks-private/
 scripts/
 dask-worker-space/
diff --git a/README.md b/README.md
index cc0a5e0..361b3bb 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,7 @@ If you want to install from source or contribute to the project please read the
 
 # Data Format
 
-The input expected by the **GreenGuard** system consists of the following two elements,
+The minimum input expected by the **GreenGuard** system consists of the following two elements,
 which need to be passed as `pandas.DataFrame` objects:
 
 ## Target Times
@@ -109,13 +109,24 @@ A table containing the signal data from the different sensors, with the followin
 | 10 | T1           | S2          | 2001-01-03 00:00:00 |      11 |
 | 11 | T1           | S2          | 2001-01-03 12:00:00 |      12 |
 
+## Turbines
+
+Optionally, a third table can be added containing metadata about the turbines.
+The only requirement for this table is to have a `turbine_id` field, and it can have
+an arbitraty number of additional fields.
+
+|    | turbine_id   | manufacturer   | ...   | ...   | ...   |
+|----|--------------|----------------|-------|-------|-------|
+|  0 | T1           | Siemens        | ...   | ...   | ...   |
+|  1 | T2           | Siemens        | ...   | ...   | ...   |
+
 ## CSV Format
 
 A part from the in-memory data format explained above, which is limited by the memory
 allocation capabilities of the system where it is run, **GreenGuard** is also prepared to
 load and work with data stored as a collection of CSV files, drastically increasing the amount
 of data which it can work with. Further details about this format can be found in the
-[project documentation site](https://D3-AI.github.io/GreenGuard/).
+[project documentation site](https://d3-ai.github.io/GreenGuard/advanced_usage/csv.html).
 
 # Quickstart
 
@@ -260,4 +271,6 @@ f1_score(test_targets, predictions)
 ## What's next?
 
 For more details about **GreenGuard** and all its possibilities and features, please check the
-[project documentation site](https://D3-AI.github.io/GreenGuard/)!
+[project documentation site](https://D3-AI.github.io/GreenGuard/)
+Also do not forget to have a look at the [notebook tutorials](
+https://github.com/D3-AI/GreenGuard/tree/master/notebooks)!
diff --git a/docs/advanced_usage/concepts.md b/docs/advanced_usage/concepts.md
new file mode 100644
index 0000000..302d34a
--- /dev/null
+++ b/docs/advanced_usage/concepts.md
@@ -0,0 +1,56 @@
+# Concepts
+
+Here we briefly explain some of the concepts and terminology used within the GreenGuard
+project and documentation.
+
+## Primitive
+
+We call the smallest computational blocks used in a Machine Learning process
+**primitives**, which:
+
+* Can be either classes or functions.
+* Have some initialization arguments, which MLBlocks calls `init_params`.
+* Have some tunable hyperparameters, which have types and a list or range of valid values.
+
+## Template
+
+Primitives can be combined to form what we call **Templates**, which:
+
+* Have a list of primitives.
+* Have some initialization arguments, which correspond to the initialization arguments
+  of their primitives.
+* Have some tunable hyperparameters, which correspond to the tunable hyperparameters
+  of their primitives.
+
+## Pipeline
+
+Templates can be used to build **Pipelines** by taking and fixing a set of valid
+hyperparameters for a Template. Hence, Pipelines:
+
+* Have a list of primitives, which corresponds to the list of primitives of their template.
+* Have some initialization arguments, which correspond to the initialization arguments
+  of their template.
+* Have some hyperparameter values, which fall within the ranges of valid tunable
+  hyperparameters of their template.
+
+A pipeline can be fitted and evaluated directly using [MLBlocks](
+https://hdi-project.github.io/MLBlocks), or using the **GreenGuardPipeline**.
+
+## Tuning
+
+We call tuning the process of, given a dataset and a template, finding the pipeline derived from
+the template that gets the best possible score on the dataset.
+
+This process usually involves fitting and evaluating multiple pipelines with different
+hyperparameter configurations on the same data while using optimization algorithms to deduce
+which hyperparameters are more likely to get the best results in the next iterations.
+
+We call each one of these evaluations a **tuning iteration**.
+
+## GreenGuardPipeline
+
+This class is the one in charge of loading the **MLBlocks Pipelines** configured in the
+system and use them to learn from the data and make predictions.
+
+This class is also responsible for tuning the pipeline hyperparameters using [BTB](
+https://hdi-project.github.io/BTB/)
diff --git a/docs/advanced_usage/csv.md b/docs/advanced_usage/csv.md
new file mode 100644
index 0000000..c020832
--- /dev/null
+++ b/docs/advanced_usage/csv.md
@@ -0,0 +1,54 @@
+# CSV Format
+
+As explained in a previous section, the input expected by the **GreenGuard** system consists of
+two tables which need to be passed as `pandas.DataFrame` objects:
+
+* The `target_times` table, which containing the specification of the problem that we are solving
+  in the form of training examples with a `turbine_id`, a `cutoff_time` and a `target` value.
+* The `readings` table, which contains the signal readings from the different sensors, with
+  `turbine_id`, `signal_id`, `timestamp` and `value` fields.
+
+However, in most scenarios the size of the available will far exceed the memory limitations
+of the system on which **GreenGuard** is being run, so loading all the data in a single
+`pandas.DataFrame` will not be possible.
+
+In order to solve this situation, **GreenGuard** provides a [CSVLoader](
+https://d3-ai.github.io/GreenGuard/api/greenguard.loaders.csv.html#greenguard.loaders.csv.CSVLoader)
+class which can be used to load data from what we call the **Raw Data Format**.
+
+## Raw Data Format
+
+The **Raw Data Format** consists on a collection of CSV files stored in a single folder with the
+following structure:
+
+* All the data from all the turbines is inside a single folder, which here we will call `readings`.
+* Inside the `readings` folder, one folder exists for each turbine, named exactly like the turbine:
+    * `readings/T001`
+    * `readings/T002`
+    * ...
+* Inside each turbine folder one CSV file exists for each month, named `%Y-%m-.csv`.
+    * `readings/T001/2010-01-.csv`
+    * `readings/T001/2010-02-.csv`
+    * `readings/T001/2010-03-.csv`
+    * ...
+* Each CSV file contains three columns:
+    * `signal_id`: name or id of the signal.
+    * ``timestamp``: timestamp of the reading formatted as ``%m/%d/%y %H:%M:%S``.
+    * `value`: value of the reading.
+
+This is an example of what a CSV contents look like:
+
+|    | signal_id   | timestamp         |   value |
+|----|-------------|-------------------|---------|
+|  0 | S1          | 01/01/01 00:00:00 |       1 |
+|  1 | S1          | 01/01/01 12:00:00 |       2 |
+|  2 | S1          | 01/02/01 00:00:00 |       3 |
+|  3 | S1          | 01/02/01 12:00:00 |       4 |
+|  4 | S1          | 01/03/01 00:00:00 |       5 |
+|  5 | S1          | 01/03/01 12:00:00 |       6 |
+|  6 | S2          | 01/01/01 00:00:00 |       7 |
+|  7 | S2          | 01/01/01 12:00:00 |       8 |
+|  8 | S2          | 01/02/01 00:00:00 |       9 |
+|  9 | S2          | 01/02/01 12:00:00 |      10 |
+| 10 | S2          | 01/03/01 00:00:00 |      11 |
+| 11 | S2          | 01/03/01 12:00:00 |      12 |
diff --git a/docs/advanced_usage/docker.md b/docs/advanced_usage/docker.md
new file mode 100644
index 0000000..e5603df
--- /dev/null
+++ b/docs/advanced_usage/docker.md
@@ -0,0 +1,107 @@
+# Docker Usage
+
+**GreenGuard** comes configured and ready to be distributed and run as a docker image which starts
+a jupyter notebook already configured to use greenguard, with all the required dependencies already
+installed.
+
+## Requirements
+
+The only requirement in order to run the GreenGuard Docker image is to have Docker installed and
+that the user has enough permissions to run it.
+
+Installation instructions for any possible system compatible can be found [here](https://docs.docker.com/install/)
+
+Additionally, the system that builds the GreenGuard Docker image will also need to have a working
+internet connection that allows downloading the base image and the additional python depenedencies.
+
+## Building the GreenGuard Docker Image
+
+After having cloned the **GreenGuard** repository, all you have to do in order to build the GreenGuard Docker
+Image is running this command:
+
+```bash
+make docker-jupyter-build
+```
+
+After a few minutes, the new image, called `greenguard-jupyter`, will have been built into the system
+and will be ready to be used or distributed.
+
+## Distributing the GreenGuard Docker Image
+
+Once the `greenguard-jupyter` image is built, it can be distributed in several ways.
+
+### Distributing using a Docker registry
+
+The simplest way to distribute the recently created image is [using a registry](https://docs.docker.com/registry/).
+
+In order to do so, we will need to have write access to a public or private registry (remember to
+[login](https://docs.docker.com/engine/reference/commandline/login/)!) and execute these commands:
+
+```bash
+docker tag greenguard-jupyter:latest your-registry-name:some-tag
+docker push your-registry-name:some-tag
+```
+
+Afterwards, in the receiving machine:
+
+```bash
+docker pull your-registry-name:some-tag
+docker tag your-registry-name:some-tag greenguard-jupyter:latest
+```
+
+### Distributing as a file
+
+If the distribution of the image has to be done offline for any reason, it can be achieved
+using the following command.
+
+In the system that already has the image:
+
+```bash
+docker save --output greenguard-jupyter.tar greenguard-jupyter
+```
+
+Then copy over the file `greenguard-jupyter.tar` to the new system and there, run:
+
+```bash
+docker load --input greenguard-jupyter.tar
+```
+
+After these commands, the `greenguard-jupyter` image should be available and ready to be used in the
+new system.
+
+
+## Running the greenguard-jupyter image
+
+Once the `greenguard-jupyter` image has been built, pulled or loaded, it is ready to be run.
+
+This can be done in two ways:
+
+### Running greenguard-jupyter with the code
+
+If the GreenGuard source code is available in the system, running the image is as simple as running
+this command from within the root of the project:
+
+```bash
+make docker-jupyter-run
+```
+
+This will start a jupyter notebook using the docker image, which you can access by pointing your
+browser at http://127.0.0.1:8888
+
+In this case, the local version of the project will also mounted within the Docker container,
+which means that any changes that you do in your local code will immediately be available
+within your notebooks, and that any notebook that you create within jupyter will also show
+up in your `notebooks` folder!
+
+### Running greenguard-jupyter without the greenguard code
+
+If the GreenGuard source code is not available in the system and only the Docker Image is, you can
+still run the image by using this command:
+
+```bash
+docker run -ti -p8888:8888 greenguard-jupyter
+```
+
+In this case, the code changes and the notebooks that you create within jupyter will stay
+inside the container and you will only be able to access and download them through the
+jupyter interface.
diff --git a/docs/index.rst b/docs/index.rst
index ab088e6..a654f0e 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -6,6 +6,14 @@
 
    Overview <readme>
 
+.. toctree::
+   :caption: Advanced Usage
+   :hidden:
+
+   advanced_usage/concepts
+   advanced_usage/csv
+   advanced_usage/docker
+
 .. toctree::
    :caption: Resources
    :hidden:
diff --git a/greenguard/loaders/csv.py b/greenguard/loaders/csv.py
index a2db438..5fec885 100644
--- a/greenguard/loaders/csv.py
+++ b/greenguard/loaders/csv.py
@@ -103,7 +103,7 @@ def __consolidate(self, readings, turbine_id):
             signals = readings[readings['value'].str.isnumeric()].signal_id.unique()
             raise ValueError('Signals contain non-numerical values: {}'.format(signals))
 
-        readings['turbine_id'] = turbine_id
+        readings.insert(0, 'turbine_id', turbine_id)
 
         LOGGER.info('Loaded %s readings from turbine %s', len(readings), turbine_id)
 
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index dd70e8a..7437a4a 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -268,16 +268,18 @@ def _is_better(self, score):
 
         return score > self.cv_score
 
-    def _generate_splits(self, X, y, readings):
+    def _generate_splits(self, X, y, readings, turbines=None):
         if self._preprocessing:
             pipeline = MLPipeline(self.template)
             LOGGER.debug('Running %s preprocessing steps', self._preprocessing)
-            context = pipeline.fit(X=X, y=y, readings=readings, output_=self._preprocessing - 1)
+            context = pipeline.fit(X=X, y=y, readings=readings,
+                                   turbines=turbines, output_=self._preprocessing - 1)
             del context['X']
             del context['y']
         else:
             context = {
-                'readings': readings
+                'readings': readings,
+                'turbines': turbines,
             }
 
         splits = list()
@@ -296,7 +298,7 @@ def _generate_splits(self, X, y, readings):
 
         return splits
 
-    def cross_validate(self, X=None, y=None, readings=None, params=None):
+    def cross_validate(self, X=None, y=None, readings=None, turbines=None, params=None):
         """Compute cross validation score using the given data.
 
         If the splits have not been previously computed, compute them now.
@@ -317,6 +319,9 @@ def cross_validate(self, X=None, y=None, readings=None, params=None):
             readings (pandas.DataFrame):
                 ``readings`` table. Only needed if the splits have not been
                 previously computed.
+            turbines (pandas.DataFrame):
+                ``turbines`` table. Only needed if the splits have not been
+                previously computed.
             params (dict):
                 hyperparameter values to use.
 
@@ -328,7 +333,7 @@ def cross_validate(self, X=None, y=None, readings=None, params=None):
 
         if self._splits is None:
             LOGGER.info('Running static steps before cross validation')
-            self._splits = self._generate_splits(X, y, readings)
+            self._splits = self._generate_splits(X, y, readings, turbines)
 
         scores = []
         for fold, pipeline, fit, predict, y_test in self._splits:
@@ -412,7 +417,7 @@ def _get_tuner(self):
 
         return tuner
 
-    def tune(self, target_times=None, readings=None, iterations=10):
+    def tune(self, target_times=None, readings=None, turbines=None, iterations=10):
         """Tune this pipeline for the indicated number of iterations.
 
         Args:
@@ -423,6 +428,9 @@ def tune(self, target_times=None, readings=None, iterations=10):
             readings (pandas.DataFrame):
                 ``readings`` table. Only needed if the splits have not been
                 previously computed.
+            turbines (pandas.DataFrame):
+                ``turbines`` table. Only needed if the splits have not been
+                previously computed.
             iterations (int):
                 Number of iterations to perform.
         """
@@ -430,7 +438,7 @@ def tune(self, target_times=None, readings=None, iterations=10):
             LOGGER.info('Scoring the default pipeline')
             X = target_times[['turbine_id', 'cutoff_time']]
             y = target_times['target']
-            self.cv_score = self.cross_validate(X, y, readings)
+            self.cv_score = self.cross_validate(X, y, readings, turbines)
 
             LOGGER.info('Default Pipeline score: %s', self.cv_score)
 
@@ -458,7 +466,7 @@ def tune(self, target_times=None, readings=None, iterations=10):
                 LOGGER.exception("Caught an exception scoring pipeline %s with params:\n%s",
                                  i + 1, failed)
 
-    def fit(self, target_times, readings):
+    def fit(self, target_times, readings, turbines=None):
         """Fit this pipeline to the given data.
 
         Args:
@@ -467,13 +475,15 @@ def fit(self, target_times, readings):
                 and ``target`` columns.
             readings (pandas.DataFrame):
                 ``readings`` table.
+            turbines (pandas.DataFrame):
+                ``turbines`` table.
         """
         X = target_times[['turbine_id', 'cutoff_time']]
         y = target_times['target']
-        self._pipeline.fit(X, y, readings=readings)
+        self._pipeline.fit(X, y, readings=readings, turbines=turbines)
         self.fitted = True
 
-    def predict(self, target_times, readings):
+    def predict(self, target_times, readings, turbines=None):
         """Make predictions using this pipeline.
 
         Args:
@@ -482,6 +492,8 @@ def predict(self, target_times, readings):
                 and ``target`` columns.
             readings (pandas.DataFrame):
                 ``readings`` table.
+            turbines (pandas.DataFrame):
+                ``turbines`` table.
 
         Returns:
             numpy.ndarray:
@@ -491,7 +503,7 @@ def predict(self, target_times, readings):
             raise NotFittedError()
 
         X = target_times[['turbine_id', 'cutoff_time']]
-        return self._pipeline.predict(X, readings=readings)
+        return self._pipeline.predict(X, readings=readings, turbines=turbines)
 
     def save(self, path):
         """Serialize and save this pipeline using cloudpickle.
diff --git a/notebooks/GreenGuard usage example.ipynb b/notebooks/1. GreenGuard Quickstart.ipynb
similarity index 99%
rename from notebooks/GreenGuard usage example.ipynb
rename to notebooks/1. GreenGuard Quickstart.ipynb
index c3179c0..a32b494 100644
--- a/notebooks/GreenGuard usage example.ipynb	
+++ b/notebooks/1. GreenGuard Quickstart.ipynb	
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# GreenGuard usage example"
+    "# GreenGuard Quickstart"
    ]
   },
   {
@@ -13,7 +13,7 @@
    "source": [
     "This notebook shows how to use GreenGuard to:\n",
     "\n",
-    "- Load some demo data\n",
+    "- Load demo data\n",
     "- Find available pipelines and load one as a template\n",
     "- Tune the template arguments to generate the optimal pipeline\n",
     "- Fit the pipeline to our data\n",
@@ -72,7 +72,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This will download some demo data from our S3 demo Bucket and load it as\n",
+    "This will download some demo data from [GreenGuard S3 demo Bucket](\n",
+    "https://d3-ai-greenguard.s3.amazonaws.com/index.html)%20and%20load%20it%20as/n",
     "the necessary `target_times` and `readings` tables.\n",
     "\n",
     "The exact format of these tables is described in the GreenGuard README and docs:"
diff --git a/notebooks/2. Extract Readings.ipynb b/notebooks/2. Extract Readings.ipynb
new file mode 100644
index 0000000..14b4cab
--- /dev/null
+++ b/notebooks/2. Extract Readings.ipynb	
@@ -0,0 +1,1214 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Extract Readings\n",
+    "\n",
+    "This notebook shows how to use the CSVLoader class to load readings from a folder\n",
+    "containing readings in the raw format.\n",
+    "\n",
+    "Details about the raw readings format can be found in the documentation site.\n",
+    "\n",
+    "In this notebook we will:\n",
+    "\n",
+    "- Generate a folder with readings in the raw format based on the demo data\n",
+    "- Load the redings needed for our target times\n",
+    "- Explore different options from the CSVLoader\n",
+    "- Load a pipeline and use it on the loaded data\n",
+    "- Load the readings in the unstacked format\n",
+    "- Load an unstacked pipeline and use it on the loaded data"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0. Setup the logging\n",
+    "\n",
+    "This step sets up logging in our environment to increase our visibility over\n",
+    "the steps that GreenGuard performs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging;\n",
+    "\n",
+    "logging.basicConfig(level=logging.INFO)\n",
+    "logging.getLogger().setLevel(level=logging.INFO)\n",
+    "\n",
+    "import warnings\n",
+    "warnings.simplefilter(\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Generate Raw Readings\n",
+    "\n",
+    "The first step will be to execute the `generate_raw_readings` function, which will create a\n",
+    "folder in the indicated path and populate it with the raw version of the demo readings.\n",
+    "\n",
+    "**NOTE**: if you want to use your own dataset you can skip this step and go directly to step 2."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-02-10 18:41:33,310 - INFO - demo - Generating file readings/T001/2013-01-.csv\n",
+      "2020-02-10 18:41:34,048 - INFO - demo - Generating file readings/T001/2013-02-.csv\n",
+      "2020-02-10 18:41:34,845 - INFO - demo - Generating file readings/T001/2013-03-.csv\n",
+      "2020-02-10 18:41:35,670 - INFO - demo - Generating file readings/T001/2013-04-.csv\n",
+      "2020-02-10 18:41:36,476 - INFO - demo - Generating file readings/T001/2013-05-.csv\n",
+      "2020-02-10 18:41:37,259 - INFO - demo - Generating file readings/T001/2013-06-.csv\n",
+      "2020-02-10 18:41:38,194 - INFO - demo - Generating file readings/T001/2013-07-.csv\n",
+      "2020-02-10 18:41:39,031 - INFO - demo - Generating file readings/T001/2013-08-.csv\n",
+      "2020-02-10 18:41:39,891 - INFO - demo - Generating file readings/T001/2013-09-.csv\n",
+      "2020-02-10 18:41:40,689 - INFO - demo - Generating file readings/T001/2013-10-.csv\n",
+      "2020-02-10 18:41:41,478 - INFO - demo - Generating file readings/T001/2013-11-.csv\n",
+      "2020-02-10 18:41:42,249 - INFO - demo - Generating file readings/T001/2013-12-.csv\n"
+     ]
+    }
+   ],
+   "source": [
+    "from greenguard.demo import generate_raw_readings\n",
+    "\n",
+    "target_times = generate_raw_readings('readings')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This function will generate a set of reading files in the raw format.\n",
+    "\n",
+    "We will load one of them to explore it:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Readings Format"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "readings_sample = pd.read_csv('readings/T001/2013-01-.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>S01</td>\n",
+       "      <td>01/10/13 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>S02</td>\n",
+       "      <td>01/10/13 00:00:00</td>\n",
+       "      <td>320.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>S03</td>\n",
+       "      <td>01/10/13 00:00:00</td>\n",
+       "      <td>284.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>S04</td>\n",
+       "      <td>01/10/13 00:00:00</td>\n",
+       "      <td>348.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>S05</td>\n",
+       "      <td>01/10/13 00:00:00</td>\n",
+       "      <td>273.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  signal_id          timestamp  value\n",
+       "0       S01  01/10/13 00:00:00  323.0\n",
+       "1       S02  01/10/13 00:00:00  320.0\n",
+       "2       S03  01/10/13 00:00:00  284.0\n",
+       "3       S04  01/10/13 00:00:00  348.0\n",
+       "4       S05  01/10/13 00:00:00  273.0"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings_sample.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Here we can cleary see the format in which the data is stored:\n",
+    "\n",
+    "* All the data from all the turbines is inside a single folder.\n",
+    "* Inside this folder, one folder exists for each turbine, named exactly like the turbine:\n",
+    "    * `readings/T001`\n",
+    "    * `readings/T002`\n",
+    "    * ...\n",
+    "* Inside each turbine folder one CSV file exists for each month, named `%Y-%m-.csv`.\n",
+    "    * `readings/T001/2010-01-.csv`\n",
+    "    * `readings/T001/2010-02-.csv`\n",
+    "    * `readings/T001/2010-03-.csv`\n",
+    "    * ...\n",
+    "* Each CSV file contains three columns:\n",
+    "    * `signal_id`: name or id of the signal.\n",
+    "    * ``timestamp``: timestamp of the reading formatted as ``%m/%d/%y %H:%M:%S``.\n",
+    "    * `value`: value of the reading."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Target Times"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The previous function will have also returned us a `target_times` variable,\n",
+    "which is a `pandas.DataFrame` with the three expected columns:\n",
+    "\n",
+    "* `turbine_id`\n",
+    "* `cutoff_time`\n",
+    "* `target`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(353, 3)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-13</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-14</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-15</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-16</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id cutoff_time  target\n",
+       "0       T001  2013-01-12       0\n",
+       "1       T001  2013-01-13       0\n",
+       "2       T001  2013-01-14       0\n",
+       "3       T001  2013-01-15       1\n",
+       "4       T001  2013-01-16       0"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.3002832861189802"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.target.mean()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "turbine_id             object\n",
+       "cutoff_time    datetime64[ns]\n",
+       "target                  int64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.dtypes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. CSVLoader\n",
+    "\n",
+    "The readings in raw format can arbitrarily big, which might make it impossible to load\n",
+    "them into memory all at once.\n",
+    "\n",
+    "In order to load them in an efficient way that allows us to solve Machine Learning problems\n",
+    "using them, GeenGuard provides the `greenguard.loaders.CVSLoader` class.\n",
+    "\n",
+    "This class is prepared to, given a target times table, explore a collection of raw readings\n",
+    "and extract only the information needed to solve the corresponding problem.\n",
+    "\n",
+    "The first step in order to use it, is to create an instance passing it the path\n",
+    "to where the reading files are stored.\n",
+    "\n",
+    "**NOTE**: If you want to use your own dataset instead of the demo version,\n",
+    "all you have to do is make the `readings_path` variable point at the\n",
+    "folder where you have your CVS files stored and load your `target_times` table:\n",
+    "\n",
+    "Make sure to parse the `cutoff_time` column!\n",
+    "\n",
+    "```python\n",
+    "readings_path = 'path/to/your/data'\n",
+    "target_times = pd.read_csv('path/to/your/target_times.csv', parse_dates=['cutoff_time'])\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from greenguard.loaders import CSVLoader\n",
+    "\n",
+    "readings_path = 'readings'\n",
+    "\n",
+    "csv_loader = CSVLoader(readings_path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Once we have created our instance, we can load the readings needed for our target times\n",
+    "calling the `load` method with two arguments:\n",
+    "\n",
+    "* `target_times (pandas.DataFrame)`: the `target_times` table.\n",
+    "* `window_size (str)`: the size of the training window, as a timedelta specification\n",
+    "  (amount + time unit). This indicates the minimum amount of data that we need to\n",
+    "  load for each training from the `target_times` table.\n",
+    "  \n",
+    "For example, let's load the readings needed for all our `target_times`, using a\n",
+    "`window_size` of one day."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-02-10 19:03:18,638 - INFO - csv - Loaded 1298564 readings from turbine T001\n",
+      "2020-02-10 19:03:18,763 - INFO - csv - Loaded 1298564 turbine readings\n",
+      "2020-02-10 19:03:19,115 - INFO - targets - Dropped 2 invalid targets\n"
+     ]
+    }
+   ],
+   "source": [
+    "target_times, readings = csv_loader.load(target_times, '1d')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1298564, 4)"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>294.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>310.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>306.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>303.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>265.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-12  294.0\n",
+       "1       T001       S02 2013-01-12  310.0\n",
+       "2       T001       S03 2013-01-12  306.0\n",
+       "3       T001       S04 2013-01-12  303.0\n",
+       "4       T001       S05 2013-01-12  265.0"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "turbine_id            object\n",
+       "signal_id             object\n",
+       "timestamp     datetime64[ns]\n",
+       "value                float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.dtypes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see how the readings have been loaded with the expected format, including\n",
+    "the four expected columns:\n",
+    "\n",
+    "* `turbine_id`: Unique identifier of the turbine which this reading comes from.\n",
+    "* `signal_id`: Unique identifier of the signal which this reading comes from.\n",
+    "* `timestamp (datetime)`: Time where the reading took place, as a datetime.\n",
+    "* `value (float)`: Numeric value of this reading."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can also see how there is a message that indicates that there are 2 invalid targets\n",
+    "that have been dropped. This is because within our readings there was not enough\n",
+    "data to cover the entire trainin window for them, so they cannot be included in the\n",
+    "final problem specification."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(351, 3)"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Let's see what happens if we increase the `window_size` to, for example, 30 days."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-02-10 19:08:21,859 - INFO - csv - Loaded 1302308 readings from turbine T001\n",
+      "2020-02-10 19:08:21,955 - INFO - csv - Loaded 1302308 turbine readings\n",
+      "2020-02-10 19:08:22,298 - INFO - targets - Dropped 28 invalid targets\n"
+     ]
+    }
+   ],
+   "source": [
+    "target_times, readings = csv_loader.load(target_times, '30d')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see that now more targets needed to be dropped, because there was enough data\n",
+    "for them."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(321, 3)"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "On the other side, we can see how now the size of the loaded readings table\n",
+    "is a bit bigger, as more data had to be included to properly cover all the\n",
+    "training windows."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1302308, 4)"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Preprocessing the data\n",
+    "\n",
+    "In some cases, if the amount of targets is big enough, fitting high frequency data\n",
+    "into memory will still be a challenge.\n",
+    "\n",
+    "For this cases, the `CSVLoader` class also supports passing a resampling rule and\n",
+    "an aggregation function specification, so the data can go through a sampling\n",
+    "frequency reduction aggregation while it is loaded, reducing the amount of spaces\n",
+    "that it occupies in memory once loaded.\n",
+    "\n",
+    "In order to use the resampling feature, we will need to create a new instance\n",
+    "of the `CSVLoader` passing the following new arguments:\n",
+    "\n",
+    "* `rule (str)`: Time-delta specification (amount+unit) of the new sampling frequency.\n",
+    "* `aggregation (str or function)`: Aggregation to apply when resampling."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "csv_loader = CSVLoader(readings_path, rule='4h', aggregation='mean')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And then call the `load` method normally."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-02-10 19:31:50,932 - INFO - csv - Loaded 1235535 readings from turbine T001\n",
+      "2020-02-10 19:31:50,938 - INFO - csv - Resampling: 4h - mean\n",
+      "2020-02-10 19:31:51,459 - INFO - csv - Loaded 52130 turbine readings\n",
+      "2020-02-10 19:31:51,689 - INFO - targets - Dropped 2 invalid targets\n"
+     ]
+    }
+   ],
+   "source": [
+    "target_times, readings = csv_loader.load(target_times, '14d')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(52130, 4)"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-27 00:00:00</td>\n",
+       "      <td>791.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-27 04:00:00</td>\n",
+       "      <td>746.750000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-27 08:00:00</td>\n",
+       "      <td>808.750000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-27 12:00:00</td>\n",
+       "      <td>760.125000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-27 16:00:00</td>\n",
+       "      <td>720.833333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id           timestamp       value\n",
+       "0       T001       S01 2013-01-27 00:00:00  791.333333\n",
+       "1       T001       S01 2013-01-27 04:00:00  746.750000\n",
+       "2       T001       S01 2013-01-27 08:00:00  808.750000\n",
+       "3       T001       S01 2013-01-27 12:00:00  760.125000\n",
+       "4       T001       S01 2013-01-27 16:00:00  720.833333"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(319, 3)"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.shape"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Unstacking\n",
+    "\n",
+    "Some of the pipelines included in **GreenGuard** expect a slightly different input format,\n",
+    "where the data has been unstacked by `signal_id`, putting the values of each signal in a\n",
+    "different column instead of having all of them in a single column.\n",
+    "\n",
+    "In such cases, the `CSVLoader` can also take care of the unstacking step.\n",
+    "\n",
+    "For this, all you need to do is add `unstack=True` argument when creating the instance\n",
+    "and then use the `load` method as usual."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-02-10 19:36:03,403 - INFO - csv - Loaded 1228047 readings from turbine T001\n",
+      "2020-02-10 19:36:03,411 - INFO - csv - Resampling: 4h - mean\n",
+      "2020-02-10 19:36:03,881 - INFO - csv - Loaded 1993 turbine readings\n",
+      "2020-02-10 19:36:04,165 - INFO - targets - Dropped 2 invalid targets\n"
+     ]
+    }
+   ],
+   "source": [
+    "csv_loader = CSVLoader(readings_path, rule='4h', aggregation='mean', unstack=True)\n",
+    "target_times, readings = csv_loader.load(target_times, '14d')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1993, 28)"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-28 00:00:00</td>\n",
+       "      <td>715.750000</td>\n",
+       "      <td>709.333333</td>\n",
+       "      <td>710.208333</td>\n",
+       "      <td>796.666667</td>\n",
+       "      <td>771.750000</td>\n",
+       "      <td>732.916667</td>\n",
+       "      <td>766.166667</td>\n",
+       "      <td>3.361627e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>13.487500</td>\n",
+       "      <td>4.272212e+06</td>\n",
+       "      <td>49.041667</td>\n",
+       "      <td>49.041667</td>\n",
+       "      <td>49.041667</td>\n",
+       "      <td>49.041667</td>\n",
+       "      <td>49.041667</td>\n",
+       "      <td>49.041667</td>\n",
+       "      <td>49.041667</td>\n",
+       "      <td>336.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-28 04:00:00</td>\n",
+       "      <td>779.416667</td>\n",
+       "      <td>777.500000</td>\n",
+       "      <td>779.666667</td>\n",
+       "      <td>824.125000</td>\n",
+       "      <td>800.083333</td>\n",
+       "      <td>765.291667</td>\n",
+       "      <td>791.958333</td>\n",
+       "      <td>3.362652e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.695833</td>\n",
+       "      <td>4.279238e+06</td>\n",
+       "      <td>43.875000</td>\n",
+       "      <td>43.875000</td>\n",
+       "      <td>43.875000</td>\n",
+       "      <td>43.875000</td>\n",
+       "      <td>43.916667</td>\n",
+       "      <td>43.875000</td>\n",
+       "      <td>43.916667</td>\n",
+       "      <td>301.083333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-28 08:00:00</td>\n",
+       "      <td>732.583333</td>\n",
+       "      <td>757.375000</td>\n",
+       "      <td>738.125000</td>\n",
+       "      <td>794.583333</td>\n",
+       "      <td>765.291667</td>\n",
+       "      <td>736.541667</td>\n",
+       "      <td>766.916667</td>\n",
+       "      <td>3.364190e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>14.100000</td>\n",
+       "      <td>4.289814e+06</td>\n",
+       "      <td>81.666667</td>\n",
+       "      <td>82.375000</td>\n",
+       "      <td>82.416667</td>\n",
+       "      <td>82.875000</td>\n",
+       "      <td>82.541667</td>\n",
+       "      <td>83.250000</td>\n",
+       "      <td>81.416667</td>\n",
+       "      <td>564.041667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-28 12:00:00</td>\n",
+       "      <td>743.833333</td>\n",
+       "      <td>779.083333</td>\n",
+       "      <td>775.833333</td>\n",
+       "      <td>804.208333</td>\n",
+       "      <td>771.458333</td>\n",
+       "      <td>736.166667</td>\n",
+       "      <td>761.000000</td>\n",
+       "      <td>3.366258e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>13.691667</td>\n",
+       "      <td>4.304198e+06</td>\n",
+       "      <td>88.250000</td>\n",
+       "      <td>90.833333</td>\n",
+       "      <td>90.875000</td>\n",
+       "      <td>91.500000</td>\n",
+       "      <td>90.166667</td>\n",
+       "      <td>90.875000</td>\n",
+       "      <td>88.916667</td>\n",
+       "      <td>616.833333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-28 16:00:00</td>\n",
+       "      <td>640.416667</td>\n",
+       "      <td>678.000000</td>\n",
+       "      <td>675.958333</td>\n",
+       "      <td>709.166667</td>\n",
+       "      <td>675.833333</td>\n",
+       "      <td>670.666667</td>\n",
+       "      <td>682.166667</td>\n",
+       "      <td>3.368310e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>12.454167</td>\n",
+       "      <td>4.318658e+06</td>\n",
+       "      <td>80.458333</td>\n",
+       "      <td>83.541667</td>\n",
+       "      <td>85.333333</td>\n",
+       "      <td>85.916667</td>\n",
+       "      <td>83.500000</td>\n",
+       "      <td>86.375000</td>\n",
+       "      <td>83.333333</td>\n",
+       "      <td>574.958333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id           timestamp   value_S01   value_S02   value_S03  \\\n",
+       "0       T001 2013-01-28 00:00:00  715.750000  709.333333  710.208333   \n",
+       "1       T001 2013-01-28 04:00:00  779.416667  777.500000  779.666667   \n",
+       "2       T001 2013-01-28 08:00:00  732.583333  757.375000  738.125000   \n",
+       "3       T001 2013-01-28 12:00:00  743.833333  779.083333  775.833333   \n",
+       "4       T001 2013-01-28 16:00:00  640.416667  678.000000  675.958333   \n",
+       "\n",
+       "    value_S04   value_S05   value_S06   value_S07     value_S08  ...  \\\n",
+       "0  796.666667  771.750000  732.916667  766.166667  3.361627e+06  ...   \n",
+       "1  824.125000  800.083333  765.291667  791.958333  3.362652e+06  ...   \n",
+       "2  794.583333  765.291667  736.541667  766.916667  3.364190e+06  ...   \n",
+       "3  804.208333  771.458333  736.166667  761.000000  3.366258e+06  ...   \n",
+       "4  709.166667  675.833333  670.666667  682.166667  3.368310e+06  ...   \n",
+       "\n",
+       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
+       "0  13.487500  4.272212e+06  49.041667  49.041667  49.041667  49.041667   \n",
+       "1  14.695833  4.279238e+06  43.875000  43.875000  43.875000  43.875000   \n",
+       "2  14.100000  4.289814e+06  81.666667  82.375000  82.416667  82.875000   \n",
+       "3  13.691667  4.304198e+06  88.250000  90.833333  90.875000  91.500000   \n",
+       "4  12.454167  4.318658e+06  80.458333  83.541667  85.333333  85.916667   \n",
+       "\n",
+       "   value_S23  value_S24  value_S25   value_S26  \n",
+       "0  49.041667  49.041667  49.041667  336.000000  \n",
+       "1  43.916667  43.875000  43.916667  301.083333  \n",
+       "2  82.541667  83.250000  81.416667  564.041667  \n",
+       "3  90.166667  90.875000  88.916667  616.833333  \n",
+       "4  83.500000  86.375000  83.333333  574.958333  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/notebooks/CSVLoader Demo.ipynb b/notebooks/CSVLoader Demo.ipynb
deleted file mode 100644
index 4710596..0000000
--- a/notebooks/CSVLoader Demo.ipynb	
+++ /dev/null
@@ -1,836 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# CSVLoader Demo\n",
-    "\n",
-    "This notebook shows how to use the CSVLoader class to load readings from a folder\n",
-    "containing readings in the raw format.\n",
-    "\n",
-    "Details about the raw readings format can be found in the documentation site.\n",
-    "\n",
-    "In this notebook we will:\n",
-    "\n",
-    "- Generate a folder with readings in the raw format based on the demo data\n",
-    "- Load the redings needed for our target times\n",
-    "- Explore different options from the CSVLoader\n",
-    "- Load a pipeline and use it on the loaded data\n",
-    "- Load the readings in the unstacked format\n",
-    "- Load an unstacked pipeline and use it on the loaded data"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 0. Setup the logging\n",
-    "\n",
-    "This step sets up logging in our environment to increase our visibility over\n",
-    "the steps that GreenGuard performs."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import logging;\n",
-    "\n",
-    "logging.basicConfig(level=logging.INFO)\n",
-    "logging.getLogger().setLevel(level=logging.INFO)\n",
-    "\n",
-    "import warnings\n",
-    "warnings.simplefilter(\"ignore\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## 1. Generate Raw Readings\n",
-    "\n",
-    "The first step will be to execute the `generate_raw_readings` function, which will create a\n",
-    "folder in the indicated path and populate it with the raw version of the demo readings."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2020-02-03 14:26:17,008 - INFO - demo - Generating file raw_readings/T001/2013-01-.csv\n",
-      "2020-02-03 14:26:17,518 - INFO - demo - Generating file raw_readings/T001/2013-02-.csv\n",
-      "2020-02-03 14:26:18,045 - INFO - demo - Generating file raw_readings/T001/2013-03-.csv\n",
-      "2020-02-03 14:26:18,580 - INFO - demo - Generating file raw_readings/T001/2013-04-.csv\n",
-      "2020-02-03 14:26:19,118 - INFO - demo - Generating file raw_readings/T001/2013-05-.csv\n",
-      "2020-02-03 14:26:19,668 - INFO - demo - Generating file raw_readings/T001/2013-06-.csv\n",
-      "2020-02-03 14:26:20,219 - INFO - demo - Generating file raw_readings/T001/2013-07-.csv\n",
-      "2020-02-03 14:26:20,753 - INFO - demo - Generating file raw_readings/T001/2013-08-.csv\n",
-      "2020-02-03 14:26:21,304 - INFO - demo - Generating file raw_readings/T001/2013-09-.csv\n",
-      "2020-02-03 14:26:21,852 - INFO - demo - Generating file raw_readings/T001/2013-10-.csv\n",
-      "2020-02-03 14:26:22,388 - INFO - demo - Generating file raw_readings/T001/2013-11-.csv\n",
-      "2020-02-03 14:26:22,931 - INFO - demo - Generating file raw_readings/T001/2013-12-.csv\n"
-     ]
-    }
-   ],
-   "source": [
-    "from greenguard.demo import generate_raw_readings\n",
-    "\n",
-    "readings_path = 'raw_readings'\n",
-    "\n",
-    "target_times = generate_raw_readings(readings_path)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "This function will generate a set of reading files in the raw format.\n",
-    "\n",
-    "We will load one of them to explore it:"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "readings_sample = pd.read_csv('raw_readings/T001/2013-01-.csv')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>01/10/13 00:00:00</td>\n",
-       "      <td>323.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S02</td>\n",
-       "      <td>01/10/13 00:00:00</td>\n",
-       "      <td>320.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S03</td>\n",
-       "      <td>01/10/13 00:00:00</td>\n",
-       "      <td>284.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S04</td>\n",
-       "      <td>01/10/13 00:00:00</td>\n",
-       "      <td>348.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S05</td>\n",
-       "      <td>01/10/13 00:00:00</td>\n",
-       "      <td>273.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id signal_id          timestamp  value\n",
-       "0       T001       S01  01/10/13 00:00:00  323.0\n",
-       "1       T001       S02  01/10/13 00:00:00  320.0\n",
-       "2       T001       S03  01/10/13 00:00:00  284.0\n",
-       "3       T001       S04  01/10/13 00:00:00  348.0\n",
-       "4       T001       S05  01/10/13 00:00:00  273.0"
-      ]
-     },
-     "execution_count": 7,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "readings_sample.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(353, 3)"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_times.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>cutoff_time</th>\n",
-       "      <th>target</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-12</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-13</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-14</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-15</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-16</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id cutoff_time  target\n",
-       "0       T001  2013-01-12       0\n",
-       "1       T001  2013-01-13       0\n",
-       "2       T001  2013-01-14       0\n",
-       "3       T001  2013-01-15       1\n",
-       "4       T001  2013-01-16       0"
-      ]
-     },
-     "execution_count": 3,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_times.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.3002832861189802"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_times.target.mean()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "turbine_id             object\n",
-       "cutoff_time    datetime64[ns]\n",
-       "target                  int64\n",
-       "dtype: object"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_times.dtypes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from greenguard.loaders import CSVLoader"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2020-02-03 12:50:11,263 - INFO - csv - Loaded 1306052 readings from turbine T001\n",
-      "2020-02-03 12:50:11,275 - INFO - csv - Loaded 1306052 turbine readings\n",
-      "2020-02-03 12:50:11,500 - INFO - targets - Dropped 2 invalid targets\n"
-     ]
-    }
-   ],
-   "source": [
-    "import numpy as np\n",
-    "\n",
-    "csv_loader = CSVLoader('raw_demo')\n",
-    "target_times, readings = csv_loader.load(target_times, '1d')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(1306052, 4)"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "readings.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-11</td>\n",
-       "      <td>209.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S02</td>\n",
-       "      <td>2013-01-11</td>\n",
-       "      <td>193.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S03</td>\n",
-       "      <td>2013-01-11</td>\n",
-       "      <td>177.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S04</td>\n",
-       "      <td>2013-01-11</td>\n",
-       "      <td>188.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S05</td>\n",
-       "      <td>2013-01-11</td>\n",
-       "      <td>150.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id signal_id  timestamp  value\n",
-       "0       T001       S01 2013-01-11  209.0\n",
-       "1       T001       S02 2013-01-11  193.0\n",
-       "2       T001       S03 2013-01-11  177.0\n",
-       "3       T001       S04 2013-01-11  188.0\n",
-       "4       T001       S05 2013-01-11  150.0"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "readings.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "turbine_id            object\n",
-       "signal_id             object\n",
-       "timestamp     datetime64[ns]\n",
-       "value                float64\n",
-       "dtype: object"
-      ]
-     },
-     "execution_count": 10,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "readings.dtypes"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(351, 3)"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_times.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2020-02-03 12:50:31,423 - INFO - csv - Loaded 1306052 readings from turbine T001\n",
-      "2020-02-03 12:50:31,427 - INFO - csv - Resampling: 4h - mean\n",
-      "2020-02-03 12:50:31,689 - INFO - csv - Loaded 2119 turbine readings\n",
-      "2020-02-03 12:50:31,843 - INFO - targets - Dropped 14 invalid targets\n"
-     ]
-    }
-   ],
-   "source": [
-    "csv_loader = CSVLoader('raw_demo', rule='4h', aggregation='mean', unstack=True)\n",
-    "target_times, readings = csv_loader.load(target_times, '15d')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(2119, 28)"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "readings.shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value_S01</th>\n",
-       "      <th>value_S02</th>\n",
-       "      <th>value_S03</th>\n",
-       "      <th>value_S04</th>\n",
-       "      <th>value_S05</th>\n",
-       "      <th>value_S06</th>\n",
-       "      <th>value_S07</th>\n",
-       "      <th>value_S08</th>\n",
-       "      <th>...</th>\n",
-       "      <th>value_S17</th>\n",
-       "      <th>value_S18</th>\n",
-       "      <th>value_S19</th>\n",
-       "      <th>value_S20</th>\n",
-       "      <th>value_S21</th>\n",
-       "      <th>value_S22</th>\n",
-       "      <th>value_S23</th>\n",
-       "      <th>value_S24</th>\n",
-       "      <th>value_S25</th>\n",
-       "      <th>value_S26</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>253.041667</td>\n",
-       "      <td>268.250000</td>\n",
-       "      <td>268.041667</td>\n",
-       "      <td>297.166667</td>\n",
-       "      <td>234.666667</td>\n",
-       "      <td>261.916667</td>\n",
-       "      <td>206.791667</td>\n",
-       "      <td>3.198335e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.079167</td>\n",
-       "      <td>3.134510e+06</td>\n",
-       "      <td>42.416667</td>\n",
-       "      <td>44.958333</td>\n",
-       "      <td>44.833333</td>\n",
-       "      <td>49.625000</td>\n",
-       "      <td>39.208333</td>\n",
-       "      <td>43.833333</td>\n",
-       "      <td>34.625</td>\n",
-       "      <td>293.166667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 04:00:00</td>\n",
-       "      <td>572.083333</td>\n",
-       "      <td>555.291667</td>\n",
-       "      <td>538.666667</td>\n",
-       "      <td>592.291667</td>\n",
-       "      <td>557.166667</td>\n",
-       "      <td>534.000000</td>\n",
-       "      <td>544.250000</td>\n",
-       "      <td>3.199514e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.837500</td>\n",
-       "      <td>3.142505e+06</td>\n",
-       "      <td>62.083333</td>\n",
-       "      <td>62.500000</td>\n",
-       "      <td>63.625000</td>\n",
-       "      <td>63.541667</td>\n",
-       "      <td>61.333333</td>\n",
-       "      <td>62.541667</td>\n",
-       "      <td>54.000</td>\n",
-       "      <td>421.208333</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 08:00:00</td>\n",
-       "      <td>688.791667</td>\n",
-       "      <td>696.791667</td>\n",
-       "      <td>706.625000</td>\n",
-       "      <td>750.791667</td>\n",
-       "      <td>714.250000</td>\n",
-       "      <td>683.333333</td>\n",
-       "      <td>658.166667</td>\n",
-       "      <td>3.201449e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>12.754167</td>\n",
-       "      <td>3.155809e+06</td>\n",
-       "      <td>92.208333</td>\n",
-       "      <td>94.958333</td>\n",
-       "      <td>94.666667</td>\n",
-       "      <td>97.333333</td>\n",
-       "      <td>94.125000</td>\n",
-       "      <td>93.583333</td>\n",
-       "      <td>86.375</td>\n",
-       "      <td>638.291667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 12:00:00</td>\n",
-       "      <td>396.333333</td>\n",
-       "      <td>418.500000</td>\n",
-       "      <td>415.791667</td>\n",
-       "      <td>438.541667</td>\n",
-       "      <td>382.250000</td>\n",
-       "      <td>364.666667</td>\n",
-       "      <td>320.333333</td>\n",
-       "      <td>3.203319e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.916667</td>\n",
-       "      <td>3.168640e+06</td>\n",
-       "      <td>55.750000</td>\n",
-       "      <td>60.083333</td>\n",
-       "      <td>58.583333</td>\n",
-       "      <td>61.291667</td>\n",
-       "      <td>52.791667</td>\n",
-       "      <td>52.791667</td>\n",
-       "      <td>44.000</td>\n",
-       "      <td>376.125000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 16:00:00</td>\n",
-       "      <td>390.458333</td>\n",
-       "      <td>408.875000</td>\n",
-       "      <td>409.500000</td>\n",
-       "      <td>458.000000</td>\n",
-       "      <td>415.583333</td>\n",
-       "      <td>363.000000</td>\n",
-       "      <td>364.458333</td>\n",
-       "      <td>3.204504e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.412500</td>\n",
-       "      <td>3.176672e+06</td>\n",
-       "      <td>49.958333</td>\n",
-       "      <td>53.875000</td>\n",
-       "      <td>54.458333</td>\n",
-       "      <td>56.750000</td>\n",
-       "      <td>52.708333</td>\n",
-       "      <td>46.708333</td>\n",
-       "      <td>47.625</td>\n",
-       "      <td>354.750000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 28 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id           timestamp   value_S01   value_S02   value_S03  \\\n",
-       "0       T001 2013-01-10 00:00:00  253.041667  268.250000  268.041667   \n",
-       "1       T001 2013-01-10 04:00:00  572.083333  555.291667  538.666667   \n",
-       "2       T001 2013-01-10 08:00:00  688.791667  696.791667  706.625000   \n",
-       "3       T001 2013-01-10 12:00:00  396.333333  418.500000  415.791667   \n",
-       "4       T001 2013-01-10 16:00:00  390.458333  408.875000  409.500000   \n",
-       "\n",
-       "    value_S04   value_S05   value_S06   value_S07     value_S08  ...  \\\n",
-       "0  297.166667  234.666667  261.916667  206.791667  3.198335e+06  ...   \n",
-       "1  592.291667  557.166667  534.000000  544.250000  3.199514e+06  ...   \n",
-       "2  750.791667  714.250000  683.333333  658.166667  3.201449e+06  ...   \n",
-       "3  438.541667  382.250000  364.666667  320.333333  3.203319e+06  ...   \n",
-       "4  458.000000  415.583333  363.000000  364.458333  3.204504e+06  ...   \n",
-       "\n",
-       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
-       "0   9.079167  3.134510e+06  42.416667  44.958333  44.833333  49.625000   \n",
-       "1  10.837500  3.142505e+06  62.083333  62.500000  63.625000  63.541667   \n",
-       "2  12.754167  3.155809e+06  92.208333  94.958333  94.666667  97.333333   \n",
-       "3  10.916667  3.168640e+06  55.750000  60.083333  58.583333  61.291667   \n",
-       "4  10.412500  3.176672e+06  49.958333  53.875000  54.458333  56.750000   \n",
-       "\n",
-       "   value_S23  value_S24  value_S25   value_S26  \n",
-       "0  39.208333  43.833333     34.625  293.166667  \n",
-       "1  61.333333  62.541667     54.000  421.208333  \n",
-       "2  94.125000  93.583333     86.375  638.291667  \n",
-       "3  52.791667  52.791667     44.000  376.125000  \n",
-       "4  52.708333  46.708333     47.625  354.750000  \n",
-       "\n",
-       "[5 rows x 28 columns]"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "readings.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(337, 3)"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_times.shape"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

From aa82ca9654da12753657d176bc6d8e5bd619a8e8 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 10 Feb 2020 22:47:23 -0500
Subject: [PATCH 011/171] Restrict sphinx version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index ab0d545..ff6afe9 100644
--- a/setup.py
+++ b/setup.py
@@ -42,7 +42,7 @@
 
     # docs
     'm2r>=0.2.0',
-    'Sphinx>=1.7.1',
+    'Sphinx>=1.7.1,<2.4',
     'sphinx_rtd_theme>=0.2.4',
     'autodocsumm>=0.1.10',
 

From 2f375a48c6b2aa70b339cb99594c18e970cbf169 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Tue, 11 Feb 2020 10:56:54 -0500
Subject: [PATCH 012/171] Prepare for v0.2.0

---
 HISTORY.md             | 8 ++++++++
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 4 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index 0bd0426..dc27a8f 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,13 @@
 # History
 
+## 0.2.0 - 2020-02-11
+
+first stable release:
+
+* efficient data loading and preprocessing
+* initial collection of dfs and lstm based pipelines
+* optimized pipeline tuning
+
 ## 0.1.0
 
 * First release on PyPI
diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 1d98a94..c60bdb4 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.1.1-dev'
+__version__ = '0.2.0.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 02c12ea..614c66a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.1.dev0
+current_version = 0.2.0.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index ff6afe9..8a09fa9 100644
--- a/setup.py
+++ b/setup.py
@@ -103,6 +103,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.1.1.dev0',
+    version='0.2.0.dev0',
     zip_safe=False,
 )

From 5de0680cbf4fc059bd4fa9fea49ac7200280edb0 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Tue, 11 Feb 2020 10:57:16 -0500
Subject: [PATCH 013/171] =?UTF-8?q?Bump=20version:=200.2.0.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.2.0.dev1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 9 +++++----
 setup.py               | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index c60bdb4..f677be1 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.0.dev0'
+__version__ = '0.2.0.dev1'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 614c66a..ba0d3ac 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,16 +1,16 @@
 [bumpversion]
-current_version = 0.2.0.dev0
+current_version = 0.2.0.dev1
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
-serialize =
+serialize = 
 	{major}.{minor}.{patch}.{release}{candidate}
 	{major}.{minor}.{patch}
 
 [bumpversion:part:release]
 optional_value = release
 first_value = dev
-values =
+values = 
 	dev
 	release
 
@@ -34,7 +34,7 @@ ignore = # keep empty to prevent default ignores
 
 [isort]
 include_trailing_comment = True
-line_length=99
+line_length = 99
 lines_between_types = 0
 multi_line_output = 4
 not_skip = __init__.py
@@ -45,3 +45,4 @@ test = pytest
 
 [tool:pytest]
 collect_ignore = ['setup.py']
+
diff --git a/setup.py b/setup.py
index 8a09fa9..bc4f06b 100644
--- a/setup.py
+++ b/setup.py
@@ -103,6 +103,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.0.dev0',
+    version='0.2.0.dev1',
     zip_safe=False,
 )

From 8cbf6f5101252a022abcd8bdb08b83d98513c456 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Tue, 11 Feb 2020 11:16:39 -0500
Subject: [PATCH 014/171] Make accuracy the default metric

---
 README.md              | 20 +++++++++++---------
 greenguard/pipeline.py |  2 +-
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 361b3bb..94de7e7 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,9 @@ AutoML for Renewable Energy Industries.
 [![PyPI Shield](https://img.shields.io/pypi/v/greenguard.svg)](https://pypi.python.org/pypi/greenguard)
 [![Travis CI Shield](https://travis-ci.org/D3-AI/GreenGuard.svg?branch=master)](https://travis-ci.org/D3-AI/GreenGuard)
 [![Downloads](https://pepy.tech/badge/greenguard)](https://pepy.tech/project/greenguard)
+<!--
 [![Coverage Status](https://codecov.io/gh/D3-AI/GreenGuard/branch/master/graph/badge.svg)](https://codecov.io/gh/D3-AI/GreenGuard)
+-->
 
 # GreenGuard
 
@@ -138,7 +140,7 @@ The first step is to load the demo data.
 
 For this, we will import and call the `greenguard.demo.load_demo` function without any arguments:
 
-```python
+```python3
 from greenguard.demo import load_demo
 
 target_times, readings = load_demo()
@@ -175,7 +177,7 @@ In this case, we will split them using the [train_test_split function from sciki
 https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.train_test_split.html),
 but it can be done with any other suitable tool.
 
-```python
+```python3
 from sklearn.model_selection import train_test_split
 
 train, test = train_test_split(target_times, test_size=0.25, random_state=0)
@@ -189,7 +191,7 @@ the `train` and `test` inputs.
 Additionally, if we want to calculate a goodness-of-fit score later on, we can separate the
 testing target values from the `test` table by popping them from it:
 
-```python
+```python3
 test_targets = test.pop('target')
 ```
 
@@ -200,7 +202,7 @@ Once we have the data ready, we need to find a suitable pipeline.
 The list of available GreenGuard Pipelines can be obtained using the `greenguard.get_pipelines`
 function.
 
-```python
+```python3
 from greenguard import get_pipelines
 
 pipelines = get_pipelines()
@@ -222,7 +224,7 @@ available in the GreenGuard system:
 For the rest of this tutorial, we will select and use the pipeline
 `resample_600s_unstack_normalize_dfs_1d_xgb_classifier` as our template.
 
-```python
+```python3
 pipeline_name = 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier'
 ```
 
@@ -234,7 +236,7 @@ fit it.
 For this, we will create an instance of a `GreenGuardPipeline` object passing the name
 of the pipeline that we want to use:
 
-```python
+```python3
 from greenguard.pipeline import GreenGuardPipeline
 
 pipeline = GreenGuardPipeline(pipeline_name)
@@ -243,7 +245,7 @@ pipeline = GreenGuardPipeline(pipeline_name)
 And then we can directly fit it to our data by calling its `fit` method and passing in the
 training `target_times` and the complete `readings` table:
 
-```python
+```python3
 pipeline.fit(train, readings)
 ```
 
@@ -253,7 +255,7 @@ After fitting the pipeline, we are ready to make predictions on new data by call
 `pipeline.predict` method passing the testing `target_times` and, again, the complete
 `readings` table.
 
-```python
+```python3
 predictions = pipeline.predict(test, readings)
 ```
 
@@ -262,7 +264,7 @@ predictions = pipeline.predict(test, readings)
 Finally, after making predictions we can evaluate how good the prediction was
 using any suitable metric.
 
-```python
+```python3
 from sklearn.metrics import f1_score
 
 f1_score(test_targets, predictions)
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 7437a4a..6ac75af 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -184,7 +184,7 @@ def set_init_params(self, init_params):
         self._update_params(template_params, init_params)
         self._build_pipeline()
 
-    def __init__(self, template, metric, cost=False, init_params=None, stratify=True,
+    def __init__(self, template, metric='accuracy', cost=False, init_params=None, stratify=True,
                  cv_splits=5, shuffle=True, random_state=0, preprocessing=0):
 
         self._cv = self._get_cv(stratify, cv_splits, shuffle, random_state)

From e756fc519562f4816eaa3e42a167861fbc759b15 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Tue, 11 Feb 2020 11:28:50 -0500
Subject: [PATCH 015/171] Use rundoc to run the readme snippets

---
 Makefile |  4 ++++
 setup.py |  1 +
 tox.ini  | 10 ++++++++--
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index 78fef0f..d4126ed 100644
--- a/Makefile
+++ b/Makefile
@@ -106,6 +106,10 @@ fix-lint: ## fix lint issues using autoflake, autopep8, and isort
 test: ## run tests quickly with the default Python
 	python -m pytest --basetemp=${ENVTMPDIR} --cov=greenguard
 
+.PHONY: test-readme
+test-readme: ## run the readme snippets
+	rundoc run --single-session python3 -t python3 README.md
+
 .PHONY: test-all
 test-all: ## run tests on every Python version with tox
 	tox -r
diff --git a/setup.py b/setup.py
index bc4f06b..611f50c 100644
--- a/setup.py
+++ b/setup.py
@@ -32,6 +32,7 @@
 tests_require = [
     'pytest>=3.4.2',
     'pytest-cov>=2.6.0',
+    'rundoc>=0.4.3'
 ]
 
 development_requires = [
diff --git a/tox.ini b/tox.ini
index f59f77b..de5cd07 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,11 +1,11 @@
 [tox]
-envlist = py35, py36, py37, lint, docs
+envlist = py35, py36, py37, lint, docs, readme
 
 
 [travis]
 python =
     3.7: py37
-    3.6: py36, docs, lint
+    3.6: py36, docs, lint, readme
     3.5: py35
 
 
@@ -30,3 +30,9 @@ skipsdist = true
 extras = dev
 commands =
     /usr/bin/env make docs
+
+
+[testenv:readme]
+skipsdist = true
+commands =
+    /usr/bin/env make test-readme

From ddad484f9423fe5b9e63dd93c5368b3382f7baef Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Tue, 11 Feb 2020 13:28:09 -0500
Subject: [PATCH 016/171] Move all travis to xenial

---
 .travis.yml | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index ef8e31a..9cbca5a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,16 +1,11 @@
 # Config file for automatic testing at travis-ci.org
-dist: trusty
+dist: xenial
 language: python
 python:
+  - 3.7
   - 3.6
   - 3.5
 
-matrix:
-  include:
-    - python: 3.7
-      dist: xenial
-      sudo: required
-
 # Command to install dependencies
 install: pip install -U tox-travis codecov
 

From b0aba1a02fa25c398cfb77bc2b34c432c7251a86 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Wed, 12 Feb 2020 19:20:54 -0500
Subject: [PATCH 017/171] Add docker-compose

---
 .dockerignore                        |  1 +
 Dockerfile                           | 16 ++++++++++++++++
 Makefile                             |  8 ++++----
 docker-compose.yml                   |  9 +++++++++
 docker/greenguard-jupyter.Dockerfile | 16 ----------------
 5 files changed, 30 insertions(+), 20 deletions(-)
 create mode 100644 Dockerfile
 create mode 100644 docker-compose.yml
 delete mode 100644 docker/greenguard-jupyter.Dockerfile

diff --git a/.dockerignore b/.dockerignore
index d8e7acb..7ea8e51 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -1 +1,2 @@
 notebooks-private/
+.tox/
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..f1f953d
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,16 @@
+FROM python:3.6
+
+EXPOSE 8888
+
+RUN adduser jupyter --uid 1000 --disabled-password --system
+
+RUN mkdir /greenguard
+COPY setup.py /greenguard
+RUN pip install -e /greenguard && pip install jupyter
+
+COPY greenguard /greenguard/greenguard
+COPY notebooks /greenguard/notebooks
+
+WORKDIR /greenguard
+USER jupyter
+CMD /usr/local/bin/jupyter notebook --ip 0.0.0.0 --NotebookApp.token=''
diff --git a/Makefile b/Makefile
index d4126ed..7683786 100644
--- a/Makefile
+++ b/Makefile
@@ -218,8 +218,8 @@ docker-jupyter-clean: ## Remove the greenguard-jupyter docker image
 	docker rmi -f greenguard-jupyter
 
 .PHONY: docker-jupyter-build
-docker-jupyter-build: docker-jupyter-clean ## Build the greenguard-jupyter docker image using repo2docker
-	docker build -f docker/greenguard-jupyter.Dockerfile -t greenguard-jupyter .
+docker-jupyter-build:  ## Build the greenguard-jupyter docker image using repo2docker
+	docker build -t greenguard-jupyter .
 
 .PHONY: docker-jupyter-save
 docker-jupyter-save: docker-jupyter-build  ## Build the greenguard-jupyter image and save it as greenguard-jupyter.tar
@@ -231,11 +231,11 @@ docker-jupyter-load: ## Load the greenguard-jupyter image from greenguard-jupyte
 
 .PHONY: docker-jupyter-run
 docker-jupyter-run: ## Run the greenguard-jupyter image in editable mode
-	docker run --rm -v $(shell pwd):/app -ti -p8888:8888 --name greenguard-jupyter greenguard-jupyter
+	docker run --rm -v $(shell pwd):/greenguard -ti -p8888:8888 --name greenguard-jupyter greenguard-jupyter
 
 .PHONY: docker-jupyter-start
 docker-jupyter-start: ## Start the greenguard-jupyter image as a daemon
-	docker run --rm -d -v $(shell pwd):/app -ti -p8888:8888 --name greenguard-jupyter greenguard-jupyter
+	docker run --rm -d -v $(shell pwd):/greenguard -ti -p8888:8888 --name greenguard-jupyter greenguard-jupyter
 
 .PHONY: docker-jupyter-stop
 docker-jupyter-stop: ## Stop the greenguard-jupyter daemon
diff --git a/docker-compose.yml b/docker-compose.yml
new file mode 100644
index 0000000..5c549c6
--- /dev/null
+++ b/docker-compose.yml
@@ -0,0 +1,9 @@
+version: '3'
+services:
+  greenguard:
+    build:
+      context: .
+    ports:
+    - "8888:8888"
+    volumes:
+    - .:/greenguard
diff --git a/docker/greenguard-jupyter.Dockerfile b/docker/greenguard-jupyter.Dockerfile
deleted file mode 100644
index 947d76b..0000000
--- a/docker/greenguard-jupyter.Dockerfile
+++ /dev/null
@@ -1,16 +0,0 @@
-FROM python:3.6
-
-EXPOSE 8888
-
-RUN mkdir /app
-COPY setup.py /app
-RUN pip install -e /app && pip install jupyter
-
-COPY greenguard /app/greenguard
-COPY notebooks /app/notebooks
-
-RUN adduser jupyter --uid 1000 --disabled-password --system
-
-WORKDIR /app
-USER jupyter
-CMD /usr/local/bin/jupyter notebook --ip 0.0.0.0 --NotebookApp.token=''

From beacd76769f861fd49204a562acf919ec1dbd83a Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 13 Feb 2020 00:33:15 -0500
Subject: [PATCH 018/171] Udpate usage instructions

---
 Dockerfile         | 15 ++++++++-------
 README.md          | 29 +++++++++++++++++++++++++++--
 docker-compose.yml |  6 ++++--
 3 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index f1f953d..bd6411a 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,16 +1,17 @@
 FROM python:3.6
 
+ARG UID=1000
 EXPOSE 8888
 
-RUN adduser jupyter --uid 1000 --disabled-password --system
+RUN adduser jupyter --uid $UID --disabled-password --system
 
-RUN mkdir /greenguard
-COPY setup.py /greenguard
-RUN pip install -e /greenguard && pip install jupyter
+RUN mkdir /app
+COPY setup.py /app
+RUN pip install -e /app && pip install jupyter
 
-COPY greenguard /greenguard/greenguard
-COPY notebooks /greenguard/notebooks
+COPY greenguard /app/greenguard
+COPY notebooks /app/notebooks
 
-WORKDIR /greenguard
+WORKDIR /app
 USER jupyter
 CMD /usr/local/bin/jupyter notebook --ip 0.0.0.0 --NotebookApp.token=''
diff --git a/README.md b/README.md
index 94de7e7..aecf61c 100644
--- a/README.md
+++ b/README.md
@@ -43,7 +43,9 @@ The salient aspects of this customized project are:
 * A robust continuous integration and testing infrastructure.
 * A ``learning database`` recording all past outcomes --> tasks, pipelines, outcomes.
 
-# Requirements
+# Install
+
+## Requirements
 
 **GreenGuard** has been developed and runs on Python 3.6 and 3.7.
 
@@ -51,7 +53,7 @@ Also, although it is not strictly required, the usage of a [virtualenv](
 https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid interfering
 with other software installed in the system where you are trying to run **GreenGuard**.
 
-# Install
+## Download and Install
 
 **GreenGuard** can be installed locally using [pip](https://pip.pypa.io/en/stable/) with
 the following command:
@@ -65,6 +67,29 @@ This will pull and install the latest stable release from [PyPi](https://pypi.or
 If you want to install from source or contribute to the project please read the
 [Contributing Guide](https://d3-ai.github.io/GreenGuard/contributing.html#get-started).
 
+## Docker usage
+
+Alternatively, **GreenGuard** is prepared to be run inside a docker environment using
+`docker-compose`.
+
+For this, make sure to have both [docker](https://docs.docker.com/install/) and [docker-compose](
+https://docs.docker.com/compose/install/) installed on your system and then follow these steps:
+
+1. Clone this repository and go into the `GreenGuard` folder:
+
+```bash
+git clone git@github.com:D3-AI/GreenGuard.git
+cd GreenGuard
+```
+
+2. Start a Jupyter Notebook inside a docker container.
+
+```bash
+docker-compose up --build
+```
+
+3. Point your browser at http://127.0.0.1:8888
+
 # Data Format
 
 The minimum input expected by the **GreenGuard** system consists of the following two elements,
diff --git a/docker-compose.yml b/docker-compose.yml
index 5c549c6..dfb7aed 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,9 +1,11 @@
 version: '3'
 services:
-  greenguard:
+  jupyter:
     build:
       context: .
+      args:
+        - UID=${UID:-1000}
     ports:
     - "8888:8888"
     volumes:
-    - .:/greenguard
+    - .:/app

From 5cd3fe89f71c3214bae085db1c2fb39e58b83a23 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 14 Feb 2020 09:52:48 -0500
Subject: [PATCH 019/171] Fix some typos an add save/load example

---
 notebooks/2. Extract Readings.ipynb | 389 ++++++++++++++++++----------
 1 file changed, 255 insertions(+), 134 deletions(-)

diff --git a/notebooks/2. Extract Readings.ipynb b/notebooks/2. Extract Readings.ipynb
index 14b4cab..8379817 100644
--- a/notebooks/2. Extract Readings.ipynb	
+++ b/notebooks/2. Extract Readings.ipynb	
@@ -6,19 +6,20 @@
    "source": [
     "# Extract Readings\n",
     "\n",
-    "This notebook shows how to use the CSVLoader class to load readings from a folder\n",
-    "containing readings in the raw format.\n",
+    "This notebook shows how to use the CSVLoader class to load the readings table from a folder\n",
+    "that contains readings in the raw CSV format.\n",
     "\n",
-    "Details about the raw readings format can be found in the documentation site.\n",
+    "The Raw CSV format es briefly explained below, but more details can be found in [the documentation site](\n",
+    "https://d3-ai.github.io/GreenGuard/advanced_usage/csv.html)/n",
     "\n",
     "In this notebook we will:\n",
     "\n",
     "- Generate a folder with readings in the raw format based on the demo data\n",
+    "- Explore the raw format\n",
     "- Load the redings needed for our target times\n",
     "- Explore different options from the CSVLoader\n",
-    "- Load a pipeline and use it on the loaded data\n",
     "- Load the readings in the unstacked format\n",
-    "- Load an unstacked pipeline and use it on the loaded data"
+    "- Store the readins and target times using pickle"
    ]
   },
   {
@@ -69,18 +70,18 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-10 18:41:33,310 - INFO - demo - Generating file readings/T001/2013-01-.csv\n",
-      "2020-02-10 18:41:34,048 - INFO - demo - Generating file readings/T001/2013-02-.csv\n",
-      "2020-02-10 18:41:34,845 - INFO - demo - Generating file readings/T001/2013-03-.csv\n",
-      "2020-02-10 18:41:35,670 - INFO - demo - Generating file readings/T001/2013-04-.csv\n",
-      "2020-02-10 18:41:36,476 - INFO - demo - Generating file readings/T001/2013-05-.csv\n",
-      "2020-02-10 18:41:37,259 - INFO - demo - Generating file readings/T001/2013-06-.csv\n",
-      "2020-02-10 18:41:38,194 - INFO - demo - Generating file readings/T001/2013-07-.csv\n",
-      "2020-02-10 18:41:39,031 - INFO - demo - Generating file readings/T001/2013-08-.csv\n",
-      "2020-02-10 18:41:39,891 - INFO - demo - Generating file readings/T001/2013-09-.csv\n",
-      "2020-02-10 18:41:40,689 - INFO - demo - Generating file readings/T001/2013-10-.csv\n",
-      "2020-02-10 18:41:41,478 - INFO - demo - Generating file readings/T001/2013-11-.csv\n",
-      "2020-02-10 18:41:42,249 - INFO - demo - Generating file readings/T001/2013-12-.csv\n"
+      "2020-02-14 09:42:07,018 - INFO - demo - Generating file readings/T001/2013-01-.csv\n",
+      "2020-02-14 09:42:07,574 - INFO - demo - Generating file readings/T001/2013-02-.csv\n",
+      "2020-02-14 09:42:08,123 - INFO - demo - Generating file readings/T001/2013-03-.csv\n",
+      "2020-02-14 09:42:08,668 - INFO - demo - Generating file readings/T001/2013-04-.csv\n",
+      "2020-02-14 09:42:09,231 - INFO - demo - Generating file readings/T001/2013-05-.csv\n",
+      "2020-02-14 09:42:09,782 - INFO - demo - Generating file readings/T001/2013-06-.csv\n",
+      "2020-02-14 09:42:10,342 - INFO - demo - Generating file readings/T001/2013-07-.csv\n",
+      "2020-02-14 09:42:10,929 - INFO - demo - Generating file readings/T001/2013-08-.csv\n",
+      "2020-02-14 09:42:11,468 - INFO - demo - Generating file readings/T001/2013-09-.csv\n",
+      "2020-02-14 09:42:12,023 - INFO - demo - Generating file readings/T001/2013-10-.csv\n",
+      "2020-02-14 09:42:12,571 - INFO - demo - Generating file readings/T001/2013-11-.csv\n",
+      "2020-02-14 09:42:13,127 - INFO - demo - Generating file readings/T001/2013-12-.csv\n"
      ]
     }
    ],
@@ -94,16 +95,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This function will generate a set of reading files in the raw format.\n",
+    "### Readings Format\n",
     "\n",
-    "We will load one of them to explore it:"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Readings Format"
+    "Here we will load one of the generated CSV files to briefly explore its contents."
    ]
   },
   {
@@ -205,10 +199,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Here we can cleary see the format in which the data is stored:\n",
+    "We can cleary see the format in which the data is stored:\n",
     "\n",
     "* All the data from all the turbines is inside a single folder.\n",
-    "* Inside this folder, one folder exists for each turbine, named exactly like the turbine:\n",
+    "* Inside this folder, another folder exists for each turbine, named exactly like the turbine:\n",
     "    * `readings/T001`\n",
     "    * `readings/T002`\n",
     "    * ...\n",
@@ -235,16 +229,16 @@
    "metadata": {},
    "source": [
     "The previous function will have also returned us a `target_times` variable,\n",
-    "which is a `pandas.DataFrame` with the three expected columns:\n",
+    "which is a `pandas.DataFrame` containing the training examples, with the three expected columns:\n",
     "\n",
-    "* `turbine_id`\n",
-    "* `cutoff_time`\n",
-    "* `target`"
+    "* `turbine_id`: Id of the turbine associated with each training example\n",
+    "* `cutoff_time`: Time at which the prediction is being made\n",
+    "* `target`: Value that needs to be predicted"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -253,7 +247,7 @@
        "(353, 3)"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -264,7 +258,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -337,7 +331,7 @@
        "4       T001  2013-01-16       0"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -348,7 +342,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -357,7 +351,7 @@
        "0.3002832861189802"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -368,7 +362,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -380,7 +374,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -398,20 +392,20 @@
     "The readings in raw format can arbitrarily big, which might make it impossible to load\n",
     "them into memory all at once.\n",
     "\n",
-    "In order to load them in an efficient way that allows us to solve Machine Learning problems\n",
-    "using them, GeenGuard provides the `greenguard.loaders.CVSLoader` class.\n",
+    "In order to load them in an efficient way so that we can use them to solve Machine Learning\n",
+    "problems, GeenGuard provides the `greenguard.loaders.CVSLoader` class.\n",
     "\n",
     "This class is prepared to, given a target times table, explore a collection of raw readings\n",
-    "and extract only the information needed to solve the corresponding problem.\n",
+    "and extract only the information needed to solve that particular problem.\n",
     "\n",
-    "The first step in order to use it, is to create an instance passing it the path\n",
+    "The first step in order to use it is to create an instance passing it the path\n",
     "to where the reading files are stored.\n",
     "\n",
     "**NOTE**: If you want to use your own dataset instead of the demo version,\n",
     "all you have to do is make the `readings_path` variable point at the\n",
     "folder where you have your CVS files stored and load your `target_times` table:\n",
     "\n",
-    "Make sure to parse the `cutoff_time` column!\n",
+    "Make sure to parse the `cutoff_time` column as a datetime!\n",
     "\n",
     "```python\n",
     "readings_path = 'path/to/your/data'\n",
@@ -421,7 +415,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -436,21 +430,21 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Once we have created our instance, we can load the readings needed for our target times\n",
-    "calling the `load` method with two arguments:\n",
+    "Once we have created our instance, we can load the readings needed for our target times by\n",
+    "calling the `load` method with the following two arguments:\n",
     "\n",
     "* `target_times (pandas.DataFrame)`: the `target_times` table.\n",
     "* `window_size (str)`: the size of the training window, as a timedelta specification\n",
     "  (amount + time unit). This indicates the minimum amount of data that we need to\n",
-    "  load for each training from the `target_times` table.\n",
+    "  load for each training example from the `target_times` table.\n",
     "  \n",
     "For example, let's load the readings needed for all our `target_times`, using a\n",
-    "`window_size` of one day."
+    "`window_size` of **one day**."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 10,
    "metadata": {
     "scrolled": false
    },
@@ -459,9 +453,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-10 19:03:18,638 - INFO - csv - Loaded 1298564 readings from turbine T001\n",
-      "2020-02-10 19:03:18,763 - INFO - csv - Loaded 1298564 turbine readings\n",
-      "2020-02-10 19:03:19,115 - INFO - targets - Dropped 2 invalid targets\n"
+      "2020-02-14 09:42:33,976 - INFO - csv - Loaded 1306052 readings from turbine T001\n",
+      "2020-02-14 09:42:34,006 - INFO - csv - Loaded 1306052 turbine readings\n",
+      "2020-02-14 09:42:34,268 - INFO - targets - Dropped 2 invalid targets\n"
      ]
     }
    ],
@@ -471,16 +465,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(1298564, 4)"
+       "(1306052, 4)"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -491,7 +485,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -526,36 +520,36 @@
        "      <th>0</th>\n",
        "      <td>T001</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2013-01-12</td>\n",
-       "      <td>294.0</td>\n",
+       "      <td>2013-01-11</td>\n",
+       "      <td>209.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>T001</td>\n",
        "      <td>S02</td>\n",
-       "      <td>2013-01-12</td>\n",
-       "      <td>310.0</td>\n",
+       "      <td>2013-01-11</td>\n",
+       "      <td>193.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>T001</td>\n",
        "      <td>S03</td>\n",
-       "      <td>2013-01-12</td>\n",
-       "      <td>306.0</td>\n",
+       "      <td>2013-01-11</td>\n",
+       "      <td>177.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>T001</td>\n",
        "      <td>S04</td>\n",
-       "      <td>2013-01-12</td>\n",
-       "      <td>303.0</td>\n",
+       "      <td>2013-01-11</td>\n",
+       "      <td>188.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>T001</td>\n",
        "      <td>S05</td>\n",
-       "      <td>2013-01-12</td>\n",
-       "      <td>265.0</td>\n",
+       "      <td>2013-01-11</td>\n",
+       "      <td>150.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -563,14 +557,14 @@
       ],
       "text/plain": [
        "  turbine_id signal_id  timestamp  value\n",
-       "0       T001       S01 2013-01-12  294.0\n",
-       "1       T001       S02 2013-01-12  310.0\n",
-       "2       T001       S03 2013-01-12  306.0\n",
-       "3       T001       S04 2013-01-12  303.0\n",
-       "4       T001       S05 2013-01-12  265.0"
+       "0       T001       S01 2013-01-11  209.0\n",
+       "1       T001       S02 2013-01-11  193.0\n",
+       "2       T001       S03 2013-01-11  177.0\n",
+       "3       T001       S04 2013-01-11  188.0\n",
+       "4       T001       S05 2013-01-11  150.0"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -581,7 +575,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -594,7 +588,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -612,23 +606,23 @@
     "\n",
     "* `turbine_id`: Unique identifier of the turbine which this reading comes from.\n",
     "* `signal_id`: Unique identifier of the signal which this reading comes from.\n",
-    "* `timestamp (datetime)`: Time where the reading took place, as a datetime.\n",
-    "* `value (float)`: Numeric value of this reading."
+    "* `timestamp (datetime)`: Time at which the reading took place, as a datetime.\n",
+    "* `value (float)`: Numerical value of this reading."
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can also see how there is a message that indicates that there are 2 invalid targets\n",
-    "that have been dropped. This is because within our readings there was not enough\n",
-    "data to cover the entire trainin window for them, so they cannot be included in the\n",
-    "final problem specification."
+    "We can also see in the logged output above that there is a message that indicates that there\n",
+    "are 2 invalid targets that have been dropped. This is because within our readings there was not\n",
+    "enough data to cover the entire trainin window for them, and they have been discarded to ensure\n",
+    "that there is no missing data in our problem data."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -637,7 +631,7 @@
        "(351, 3)"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -650,21 +644,21 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Let's see what happens if we increase the `window_size` to, for example, 30 days."
+    "Let's see what happens if we increase the `window_size` to, for example, **30 days**."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-10 19:08:21,859 - INFO - csv - Loaded 1302308 readings from turbine T001\n",
-      "2020-02-10 19:08:21,955 - INFO - csv - Loaded 1302308 turbine readings\n",
-      "2020-02-10 19:08:22,298 - INFO - targets - Dropped 28 invalid targets\n"
+      "2020-02-14 09:42:54,273 - INFO - csv - Loaded 1306052 readings from turbine T001\n",
+      "2020-02-14 09:42:54,309 - INFO - csv - Loaded 1306052 turbine readings\n",
+      "2020-02-14 09:42:54,535 - INFO - targets - Dropped 29 invalid targets\n"
      ]
     }
    ],
@@ -676,22 +670,21 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can see that now more targets needed to be dropped, because there was enough data\n",
-    "for them."
+    "We can see that now more targets were be dropped, because there was not enough data for them."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(321, 3)"
+       "(322, 3)"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -704,23 +697,22 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "On the other side, we can see how now the size of the loaded readings table\n",
-    "is a bit bigger, as more data had to be included to properly cover all the\n",
-    "training windows."
+    "On the other side, we can see how now the size of the loaded readings table has increased,\n",
+    "as more data had to be included to properly cover all the training windows."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(1302308, 4)"
+       "(1306052, 4)"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -735,24 +727,27 @@
    "source": [
     "## 3. Preprocessing the data\n",
     "\n",
-    "In some cases, if the amount of targets is big enough, fitting high frequency data\n",
+    "In some cases, if the amount of targets is big enough, loading high frequency data\n",
     "into memory will still be a challenge.\n",
     "\n",
-    "For this cases, the `CSVLoader` class also supports passing a resampling rule and\n",
-    "an aggregation function specification, so the data can go through a sampling\n",
-    "frequency reduction aggregation while it is loaded, reducing the amount of spaces\n",
-    "that it occupies in memory once loaded.\n",
+    "For this cases, the `CSVLoader` class also supports passing a **resampling rule** and\n",
+    "an **aggregation function** specification. In this cases, the data will go through a\n",
+    "**sampling frequency reduction aggregation** while it is loaded, reducing the amount\n",
+    "of memory needed to load it.\n",
     "\n",
-    "In order to use the resampling feature, we will need to create a new instance\n",
-    "of the `CSVLoader` passing the following new arguments:\n",
+    "In order to use the resampling feature, we will need to create a new instance of the\n",
+    "`CSVLoader` passing the following new arguments:\n",
     "\n",
     "* `rule (str)`: Time-delta specification (amount+unit) of the new sampling frequency.\n",
-    "* `aggregation (str or function)`: Aggregation to apply when resampling."
+    "* `aggregation (str or function)`: Aggregation function to apply when resampling.\n",
+    "\n",
+    "For example, let's create a `CSVLoader` instance that will reduce the sampling frequency\n",
+    "to **4 hours**, computing the **mean** of all the readings withing each interval."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -768,17 +763,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-10 19:31:50,932 - INFO - csv - Loaded 1235535 readings from turbine T001\n",
-      "2020-02-10 19:31:50,938 - INFO - csv - Resampling: 4h - mean\n",
-      "2020-02-10 19:31:51,459 - INFO - csv - Loaded 52130 turbine readings\n",
-      "2020-02-10 19:31:51,689 - INFO - targets - Dropped 2 invalid targets\n"
+      "2020-02-14 09:43:13,166 - INFO - csv - Loaded 1239279 readings from turbine T001\n",
+      "2020-02-14 09:43:13,168 - INFO - csv - Resampling: 4h - mean\n",
+      "2020-02-14 09:43:13,443 - INFO - csv - Loaded 52286 turbine readings\n",
+      "2020-02-14 09:43:13,586 - INFO - targets - Dropped 2 invalid targets\n"
      ]
     }
    ],
@@ -786,18 +781,25 @@
     "target_times, readings = csv_loader.load(target_times, '14d')"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can see now how the size of the readings table has been drastically reduced."
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(52130, 4)"
+       "(52286, 4)"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -808,7 +810,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -887,7 +889,7 @@
        "4       T001       S01 2013-01-27 16:00:00  720.833333"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -898,16 +900,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(319, 3)"
+       "(320, 3)"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -922,9 +924,9 @@
    "source": [
     "## 4. Unstacking\n",
     "\n",
-    "Some of the pipelines included in **GreenGuard** expect a slightly different input format,\n",
+    "Some of the pipelines included in **GreenGuard** expect a slightly different input format\n",
     "where the data has been unstacked by `signal_id`, putting the values of each signal in a\n",
-    "different column instead of having all of them in a single column.\n",
+    "different column instead of having all of them in a single one.\n",
     "\n",
     "In such cases, the `CSVLoader` can also take care of the unstacking step.\n",
     "\n",
@@ -934,17 +936,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-10 19:36:03,403 - INFO - csv - Loaded 1228047 readings from turbine T001\n",
-      "2020-02-10 19:36:03,411 - INFO - csv - Resampling: 4h - mean\n",
-      "2020-02-10 19:36:03,881 - INFO - csv - Loaded 1993 turbine readings\n",
-      "2020-02-10 19:36:04,165 - INFO - targets - Dropped 2 invalid targets\n"
+      "2020-02-14 09:43:33,528 - INFO - csv - Loaded 1231791 readings from turbine T001\n",
+      "2020-02-14 09:43:33,530 - INFO - csv - Resampling: 4h - mean\n",
+      "2020-02-14 09:43:33,831 - INFO - csv - Loaded 1999 turbine readings\n",
+      "2020-02-14 09:43:33,970 - INFO - targets - Dropped 2 invalid targets\n"
      ]
     }
    ],
@@ -953,18 +955,25 @@
     "target_times, readings = csv_loader.load(target_times, '14d')"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The result is a table which has a much smaller number of rows, but one column for each signal"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(1993, 28)"
+       "(1999, 28)"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -975,7 +984,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -1180,7 +1189,7 @@
        "[5 rows x 28 columns]"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1188,6 +1197,118 @@
    "source": [
     "readings.head()"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Saving the readings\n",
+    "\n",
+    "In some cases we will not be intending to use the generated `readings` and `target_times` tables\n",
+    "right away, but rather store them for later use.\n",
+    "\n",
+    "### Using CSV\n",
+    "\n",
+    "This can be done using pandas an plain `CSV` format:\n",
+    "\n",
+    "**NOTE**: Notice the `index=False` argument. Otherwise, an extra index column will be added\n",
+    "to the CSV which would force us to modify the loading steps afterwards."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "target_times.to_csv('my_problem_target_times.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "readings.to_csv('my_problem_readings.csv', index=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "After this, we can easily reload the data back using pandas again.\n",
+    "\n",
+    "**NOTE**: Notice how the datetime columns need to be passed so they can be parsed!"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "my_target_times = pd.read_csv('my_problem_target_times.csv', parse_dates=['cutoff_time'])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "my_readings = pd.read_csv('my_problem_readings.csv', parse_dates=['timestamp'])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "However, this has 2 inconvenients:\n",
+    "* Saving and loading the data is slow\n",
+    "* The datetimes need to be explicitly parsed"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Using Pickle\n",
+    "\n",
+    "To solve the previously mentioned inconveniences we can use `pickle` instead of `CSV` format\n",
+    "to store our data.\n",
+    "\n",
+    "In order to do this we will put the two tables in a `tuple` and store them using `pickle.dump`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pickle\n",
+    "\n",
+    "with open('my_problem.plk', 'wb') as pickle_file:\n",
+    "    pickle.dump((target_times, readings), pickle_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And then load it back all at once using `pickle.load`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "with open('my_problem.plk', 'rb') as pickle_file:\n",
+    "    my_target_times, my_readings = pickle.load(pickle_file)"
+   ]
   }
  ],
  "metadata": {

From 88c2bc52da5df3aef81e57bc7dc92ca718ffd136 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 14 Feb 2020 09:53:59 -0500
Subject: [PATCH 020/171] Prepare release notes for v0.2.0

---
 HISTORY.md | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index dc27a8f..90dec27 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,12 +1,13 @@
 # History
 
-## 0.2.0 - 2020-02-11
+## 0.2.0 - 2020-02-14
 
-first stable release:
+First stable release:
 
 * efficient data loading and preprocessing
 * initial collection of dfs and lstm based pipelines
 * optimized pipeline tuning
+* documentation and tutorials
 
 ## 0.1.0
 

From f3ce70b2c7108576821c305d6ea39e78cc0262b7 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 14 Feb 2020 09:54:04 -0500
Subject: [PATCH 021/171] =?UTF-8?q?Bump=20version:=200.2.0.dev1=20?=
 =?UTF-8?q?=E2=86=92=200.2.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index f677be1..0c5a354 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.0.dev1'
+__version__ = '0.2.0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index ba0d3ac..e64b81c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.0.dev1
+current_version = 0.2.0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 611f50c..b200d20 100644
--- a/setup.py
+++ b/setup.py
@@ -104,6 +104,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.0.dev1',
+    version='0.2.0',
     zip_safe=False,
 )

From bb13a9353766803daca9f0a40eb19acd94a695a4 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 14 Feb 2020 09:54:19 -0500
Subject: [PATCH 022/171] =?UTF-8?q?Bump=20version:=200.2.0=20=E2=86=92=200?=
 =?UTF-8?q?.2.1.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 0c5a354..35fcad0 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.0'
+__version__ = '0.2.1.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index e64b81c..3596a0b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.0
+current_version = 0.2.1.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index b200d20..96b2019 100644
--- a/setup.py
+++ b/setup.py
@@ -104,6 +104,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.0',
+    version='0.2.1.dev0',
     zip_safe=False,
 )

From 18e3c59493dbf5daae54b129925d1f952fd35102 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Wed, 19 Feb 2020 13:44:28 -0500
Subject: [PATCH 023/171] Fix datetime format

---
 greenguard/demo.py        | 2 +-
 greenguard/loaders/csv.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/greenguard/demo.py b/greenguard/demo.py
index bae6a64..e15f71d 100644
--- a/greenguard/demo.py
+++ b/greenguard/demo.py
@@ -61,7 +61,7 @@ def generate_raw_readings(output_path='demo'):
         data = readings[readings.turbine_id == turbine_id]
         for month in range(1, 13):
             month_data = data[data.timestamp.dt.month == month].copy()
-            month_data['timestamp'] = month_data['timestamp'].dt.strftime('%m/%d/%y %M:%H:%S')
+            month_data['timestamp'] = month_data['timestamp'].dt.strftime('%m/%d/%y %H:%M:%S')
             month_path = os.path.join(turbine_path, '2013-{:02d}-.csv'.format(month))
             LOGGER.info('Generating file %s', month_path)
             month_data[['signal_id', 'timestamp', 'value']].to_csv(month_path, index=False)
diff --git a/greenguard/loaders/csv.py b/greenguard/loaders/csv.py
index 5fec885..b0bfae5 100644
--- a/greenguard/loaders/csv.py
+++ b/greenguard/loaders/csv.py
@@ -36,7 +36,7 @@ class CSVLoader:
             Only used when resampling. Defaults to ``False``.
     """
 
-    DEFAULT_DATETIME_FMT = '%m/%d/%y %M:%H:%S'
+    DEFAULT_DATETIME_FMT = '%m/%d/%y %H:%M:%S'
     DEFAULT_FILENAME_FMT = '%Y-%m-.csv'
 
     def __init__(self, readings_path='.', rule=None, aggregation='mean', unstack=False,

From fe662815629094e35ee2adbb9bc1043fe2d75e8f Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 20 Feb 2020 21:43:12 -0500
Subject: [PATCH 024/171] Resample once per file. Also improve
 select_valid_targets

---
 greenguard/loaders/csv.py | 30 ++++++++++++++++++------------
 greenguard/pipeline.py    |  4 +++-
 greenguard/targets.py     | 31 +++++++++++++++++++++++++++----
 3 files changed, 48 insertions(+), 17 deletions(-)

diff --git a/greenguard/loaders/csv.py b/greenguard/loaders/csv.py
index 5fec885..86f7b74 100644
--- a/greenguard/loaders/csv.py
+++ b/greenguard/loaders/csv.py
@@ -4,7 +4,7 @@
 import dask
 import pandas as pd
 
-from greenguard.targets import select_valid_targets
+from greenguard.targets import drop_duplicates, select_valid_targets
 
 LOGGER = logging.getLogger(__name__)
 
@@ -54,6 +54,12 @@ def __filter_by_signal(self, readings, signals):
             LOGGER.debug('Filtering by signal')
             readings = readings[readings.signal_id.isin(signals)]
 
+        try:
+            readings['value'] = readings['value'].astype(float)
+        except ValueError:
+            signals = readings[readings['value'].str.isnumeric()].signal_id.unique()
+            raise ValueError('Signals contain non-numerical values: {}'.format(signals))
+
         LOGGER.debug('Selected %s readings by signal', len(readings))
 
         return readings.copy()
@@ -97,12 +103,6 @@ def __load_readings_file(self, turbine_file, timestamps, signals):
     @dask.delayed
     def __consolidate(self, readings, turbine_id):
         readings = pd.concat(readings, ignore_index=True)
-        try:
-            readings['value'] = readings['value'].astype(float)
-        except ValueError:
-            signals = readings[readings['value'].str.isnumeric()].signal_id.unique()
-            raise ValueError('Signals contain non-numerical values: {}'.format(signals))
-
         readings.insert(0, 'turbine_id', turbine_id)
 
         LOGGER.info('Loaded %s readings from turbine %s', len(readings), turbine_id)
@@ -127,9 +127,12 @@ def _join_names(names):
     @dask.delayed
     def __resample(self, readings):
         LOGGER.info('Resampling: %s - %s', self._rule, self._aggregation)
-        grouped = readings.groupby(['turbine_id', 'signal_id'])
+        grouped = readings.groupby('signal_id')
         dfr = grouped.resample(rule=self._rule, on='timestamp')
         agg = dfr.agg(self._aggregation)
+
+        LOGGER.info('%s readings reduced to %s', len(readings), len(agg))
+
         if self._unstack:
             agg = agg.unstack(level='signal_id').reset_index()
             agg.columns = agg.columns.map(self._join_names)
@@ -149,14 +152,15 @@ def _load_turbine(self, turbine_id, timestamps, signals=None):
             file_readings = self.__load_readings_file(filename, timestamps, signals)
             file_readings = self.__filter_by_signal(file_readings, signals)
             file_readings = self.__filter_by_timestamp(file_readings, timestamps)
+
+            if self._rule:
+                file_readings = self.__resample(file_readings)
+
             readings.append(file_readings)
 
         if readings:
             readings = self.__consolidate(readings, turbine_id)
 
-            if self._rule:
-                readings = self.__resample(readings)
-
         return readings
 
     @staticmethod
@@ -199,6 +203,8 @@ def load(self, target_times, window_size, signals=None, debug=False, select_vali
             target_times = pd.read_csv(target_times)
             target_times['cutoff_time'] = pd.to_datetime(target_times['cutoff_time'])
 
+        target_times = drop_duplicates(target_times)
+
         if isinstance(signals, pd.DataFrame):
             signals = signals.signal_id
 
@@ -222,7 +228,7 @@ def load(self, target_times, window_size, signals=None, debug=False, select_vali
         LOGGER.info('Loaded %s turbine readings', len(readings))
 
         if select_valid:
-            target_times = select_valid_targets(target_times, readings, window_size)
+            target_times = select_valid_targets(target_times, readings, window_size, self._rule)
             return target_times, readings
 
         return readings
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 6ac75af..5ed7ec1 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -224,6 +224,8 @@ def __init__(self, template, metric='accuracy', cost=False, init_params=None, st
         if self._preprocessing and (self._preprocessing > self._static):
             raise ValueError('Preprocessing cannot be bigger than static')
 
+        self.iterations = 0
+
     def __repr__(self):
         return (
             "GreenGuardPipeline({})\n"
@@ -444,7 +446,7 @@ def tune(self, target_times=None, readings=None, turbines=None, iterations=10):
 
             self._tuner = self._get_tuner()
 
-        for i in range(iterations):
+        for i in range(self.iterations, self.iterations + iterations):
             LOGGER.info('Scoring pipeline %s', i + 1)
 
             params = self._tuner.propose(1)
diff --git a/greenguard/targets.py b/greenguard/targets.py
index 18106b7..aef280a 100644
--- a/greenguard/targets.py
+++ b/greenguard/targets.py
@@ -119,12 +119,12 @@ def apply_function(row):
         except KeyError:
             return False
 
-        return times['min'] < cutoff < times['max']
+        return times['min'] <= cutoff <= times['max']
 
     return apply_function
 
 
-def select_valid_targets(target_times, readings, window_size):
+def select_valid_targets(target_times, readings, window_size, rule=None):
     """Filter out target_times without enough data for this window_size.
 
     The table_times table is scanned and checked against the readings table
@@ -138,6 +138,9 @@ def select_valid_targets(target_times, readings, window_size):
             Readings table, with at least turbine_id, signal_id, and timestamp ields.
         window_size (str or pandas.TimeDelta):
             TimeDelta specification that indicates the lenght of the training window.
+        rule (str or pandas.TimeDelta):
+            Resampling rule specification. If given, add that to the max timestamp
+            to ensure the period is completely covered.
 
     Returns:
         pandas.DataFrame:
@@ -147,9 +150,29 @@ def select_valid_targets(target_times, readings, window_size):
     timestamps = readings.groupby('turbine_id').timestamp.agg(['min', 'max'])
     timestamps['min'] += pd.to_timedelta(window_size)
 
+    if rule is not None:
+        timestamps['max'] += pd.to_timedelta(rule)
+
     valid = target_times.apply(_valid_targets(timestamps), axis=1)
-    valid_targets = target_times[valid].copy()
+    valid_targets = target_times[valid]
 
-    LOGGER.info('Dropped %s invalid targets', len(target_times) - len(valid_targets))
+    length = len(valid_targets)
+    LOGGER.info('Dropped %s targets without enough data. Final target_times size: %s',
+                len(target_times) - length, length)
 
     return valid_targets
+
+
+def drop_duplicates(target_times):
+    length = len(target_times)
+    filtered = target_times.drop_duplicates()
+    new_length = len(filtered)
+    if length != new_length:
+        LOGGER.warn('Dropped %s duplicate targets!', length - new_length)
+
+    filtered = filtered.drop_duplicates(subset=['turbine_id', 'cutoff_time'], keep=False)
+    final_length = len(filtered)
+    if new_length != final_length:
+        LOGGER.warn('Dropped %s incoherent targets!', new_length - final_length)
+
+    return filtered.copy()

From 75e248ec0bb8114739a6dbb3a0d03588f9389701 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 24 Feb 2020 18:32:25 -0500
Subject: [PATCH 025/171] Fix docker config and update notebooks to the latest
 changes

---
 Dockerfile                               |   5 +-
 docker-compose.yml                       |   1 +
 notebooks/1. GreenGuard Quickstart.ipynb | 176 +++++-----
 notebooks/2. Extract Readings.ipynb      | 405 +++++++++++++----------
 4 files changed, 315 insertions(+), 272 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index bd6411a..3aeebd1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -7,8 +7,11 @@ RUN adduser jupyter --uid $UID --disabled-password --system
 
 RUN mkdir /app
 COPY setup.py /app
-RUN pip install -e /app && pip install jupyter
+RUN mkdir /app/greenguard
+COPY greenguard/__init__.py /app/greenguard
+RUN pip install -e /app jupyter
 
+RUN rm -r /app/greenguard
 COPY greenguard /app/greenguard
 COPY notebooks /app/notebooks
 
diff --git a/docker-compose.yml b/docker-compose.yml
index dfb7aed..a839518 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,6 +1,7 @@
 version: '3'
 services:
   jupyter:
+    network_mode: host
     build:
       context: .
       args:
diff --git a/notebooks/1. GreenGuard Quickstart.ipynb b/notebooks/1. GreenGuard Quickstart.ipynb
index a32b494..9c0e2d7 100644
--- a/notebooks/1. GreenGuard Quickstart.ipynb	
+++ b/notebooks/1. GreenGuard Quickstart.ipynb	
@@ -380,7 +380,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -405,7 +405,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -420,7 +420,7 @@
        " 'resample_600s_unstack_144_lstm_timeseries_classifier']"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -440,7 +440,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -451,7 +451,7 @@
        " 'resample_600s_unstack_dfs_1d_xgb_classifier']"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -470,18 +470,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'resample_600s_normalize_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_unstack_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json'}"
+       "{'resample_600s_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_unstack_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json'}"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -508,7 +508,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -537,7 +537,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -557,15 +557,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-03 12:51:46,145 - INFO - pipeline - Scoring the default pipeline\n",
-      "2020-02-03 12:51:46,147 - INFO - pipeline - Running static steps before cross validation\n"
+      "INFO:greenguard.pipeline:Scoring the default pipeline\n",
+      "INFO:greenguard.pipeline:Running static steps before cross validation\n"
      ]
     },
     {
@@ -573,32 +573,32 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 00:32 | Progress: 100%|██████████\n",
-      "Elapsed: 00:16 | Progress: 100%|██████████\n",
+      "Elapsed: 00:47 | Progress: 100%|██████████\n",
+      "Elapsed: 00:24 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 00:33 | Progress: 100%|██████████\n",
-      "Elapsed: 00:15 | Progress: 100%|██████████\n",
+      "Elapsed: 00:50 | Progress: 100%|██████████\n",
+      "Elapsed: 00:23 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 00:31 | Progress: 100%|██████████\n",
-      "Elapsed: 00:15 | Progress: 100%|██████████\n"
+      "Elapsed: 00:46 | Progress: 100%|██████████\n",
+      "Elapsed: 00:23 | Progress: 100%|██████████\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-03 12:54:14,195 - INFO - pipeline - Default Pipeline score: 0.605187908496732\n",
-      "2020-02-03 12:54:14,196 - INFO - pipeline - Scoring pipeline 1\n",
-      "2020-02-03 12:54:14,199 - INFO - gp - Using Uniform sampler as user specified r_minimum threshold is not met to start the GP based learning\n",
-      "2020-02-03 12:54:14,380 - INFO - pipeline - Pipeline 1 score: 0.5976760567286199\n",
-      "2020-02-03 12:54:14,385 - INFO - pipeline - Scoring pipeline 2\n",
-      "2020-02-03 12:54:14,888 - INFO - pipeline - Pipeline 2 score: 0.5965798320999443\n",
-      "2020-02-03 12:54:14,890 - INFO - pipeline - Scoring pipeline 3\n",
-      "2020-02-03 12:54:15,313 - INFO - pipeline - Pipeline 3 score: 0.6431783902372138\n",
-      "2020-02-03 12:54:15,316 - INFO - pipeline - Scoring pipeline 4\n",
-      "2020-02-03 12:54:15,729 - INFO - pipeline - Pipeline 4 score: 0.5642664541017163\n",
-      "2020-02-03 12:54:15,731 - INFO - pipeline - Scoring pipeline 5\n",
-      "2020-02-03 12:54:15,883 - INFO - pipeline - Pipeline 5 score: 0.5859328579916815\n"
+      "INFO:greenguard.pipeline:Default Pipeline score: 0.605187908496732\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 1\n",
+      "INFO:btb:Using Uniform sampler as user specified r_minimum threshold is not met to start the GP based learning\n",
+      "INFO:greenguard.pipeline:Pipeline 1 score: 0.6188131761825791\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 2\n",
+      "INFO:greenguard.pipeline:Pipeline 2 score: 0.6271095502877767\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 3\n",
+      "INFO:greenguard.pipeline:Pipeline 3 score: 0.6305597783858653\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 4\n",
+      "INFO:greenguard.pipeline:Pipeline 4 score: 0.6024864024864024\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 5\n",
+      "INFO:greenguard.pipeline:Pipeline 5 score: 0.6141217155301661\n"
      ]
     }
    ],
@@ -618,21 +618,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'mlprimitives.custom.feature_extraction.CategoricalEncoder#1': {'max_labels': 28},\n",
-       " 'xgboost.XGBClassifier#1': {'n_estimators': 549,\n",
-       "  'max_depth': 3,\n",
-       "  'learning_rate': 0.09499856413762053,\n",
-       "  'gamma': 0.48809516357182936,\n",
-       "  'min_child_weight': 7}}"
+       "{'mlprimitives.custom.feature_extraction.CategoricalEncoder#1': {'max_labels': 82},\n",
+       " 'xgboost.XGBClassifier#1': {'n_estimators': 785,\n",
+       "  'max_depth': 7,\n",
+       "  'learning_rate': 0.12220259756122442,\n",
+       "  'gamma': 0.07359343182340616,\n",
+       "  'min_child_weight': 9}}"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -651,16 +651,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6431783902372138"
+       "0.6305597783858653"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -680,33 +680,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-03 12:54:15,971 - INFO - pipeline - Scoring pipeline 1\n",
-      "2020-02-03 12:54:16,421 - INFO - pipeline - Pipeline 1 score: 0.6220467704338674\n",
-      "2020-02-03 12:54:16,423 - INFO - pipeline - Scoring pipeline 2\n",
-      "2020-02-03 12:54:16,795 - INFO - pipeline - Pipeline 2 score: 0.5867369345630215\n",
-      "2020-02-03 12:54:16,797 - INFO - pipeline - Scoring pipeline 3\n",
-      "2020-02-03 12:54:17,227 - INFO - pipeline - Pipeline 3 score: 0.6161616161616162\n",
-      "2020-02-03 12:54:17,229 - INFO - pipeline - Scoring pipeline 4\n",
-      "2020-02-03 12:54:17,725 - INFO - pipeline - Pipeline 4 score: 0.6037324896256047\n",
-      "2020-02-03 12:54:17,727 - INFO - pipeline - Scoring pipeline 5\n",
-      "2020-02-03 12:54:18,287 - INFO - pipeline - Pipeline 5 score: 0.6169717350045217\n",
-      "2020-02-03 12:54:18,288 - INFO - pipeline - Scoring pipeline 6\n",
-      "2020-02-03 12:54:18,744 - INFO - pipeline - Pipeline 6 score: 0.639102564102564\n",
-      "2020-02-03 12:54:18,746 - INFO - pipeline - Scoring pipeline 7\n",
-      "2020-02-03 12:54:19,171 - INFO - pipeline - Pipeline 7 score: 0.6724889262202695\n",
-      "2020-02-03 12:54:19,174 - INFO - pipeline - Scoring pipeline 8\n",
-      "2020-02-03 12:54:19,627 - INFO - pipeline - Pipeline 8 score: 0.628250663400694\n",
-      "2020-02-03 12:54:19,629 - INFO - pipeline - Scoring pipeline 9\n",
-      "2020-02-03 12:54:20,250 - INFO - pipeline - Pipeline 9 score: 0.656191724941725\n",
-      "2020-02-03 12:54:20,253 - INFO - pipeline - Scoring pipeline 10\n",
-      "2020-02-03 12:54:20,799 - INFO - pipeline - Pipeline 10 score: 0.639014073371284\n"
+      "INFO:greenguard.pipeline:Scoring pipeline 1\n",
+      "INFO:greenguard.pipeline:Pipeline 1 score: 0.6635006784260514\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 2\n",
+      "INFO:greenguard.pipeline:Pipeline 2 score: 0.6845139382452815\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 3\n",
+      "INFO:greenguard.pipeline:Pipeline 3 score: 0.6424425247954658\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 4\n",
+      "INFO:greenguard.pipeline:Pipeline 4 score: 0.6146558553876801\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 5\n",
+      "INFO:greenguard.pipeline:Pipeline 5 score: 0.6188226349516671\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 6\n",
+      "INFO:greenguard.pipeline:Pipeline 6 score: 0.6213326748609891\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 7\n",
+      "INFO:greenguard.pipeline:Pipeline 7 score: 0.6431577681577682\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 8\n",
+      "INFO:greenguard.pipeline:Pipeline 8 score: 0.6119918008302174\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 9\n",
+      "INFO:greenguard.pipeline:Pipeline 9 score: 0.670814479638009\n",
+      "INFO:greenguard.pipeline:Scoring pipeline 10\n",
+      "INFO:greenguard.pipeline:Pipeline 10 score: 0.6781385082782808\n"
      ]
     }
    ],
@@ -716,16 +716,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6724889262202695"
+       "0.6845139382452815"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -736,21 +736,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'mlprimitives.custom.feature_extraction.CategoricalEncoder#1': {'max_labels': 35},\n",
-       " 'xgboost.XGBClassifier#1': {'n_estimators': 542,\n",
-       "  'max_depth': 9,\n",
-       "  'learning_rate': 0.8024814826871371,\n",
-       "  'gamma': 0.8891378840299992,\n",
+       "{'mlprimitives.custom.feature_extraction.CategoricalEncoder#1': {'max_labels': 84},\n",
+       " 'xgboost.XGBClassifier#1': {'n_estimators': 788,\n",
+       "  'max_depth': 4,\n",
+       "  'learning_rate': 0.13866846579555614,\n",
+       "  'gamma': 0.652732260680545,\n",
        "  'min_child_weight': 10}}"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -774,7 +774,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -782,7 +782,7 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 00:35 | Progress: 100%|██████████\n"
+      "Elapsed: 00:52 | Progress: 100%|██████████\n"
      ]
     }
    ],
@@ -801,14 +801,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elapsed: 00:11 | Progress: 100%|██████████\n"
+      "Elapsed: 00:17 | Progress: 100%|██████████\n"
      ]
     }
    ],
@@ -825,16 +825,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.7058823529411765"
+       "0.76"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -863,7 +863,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -882,7 +882,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -898,14 +898,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elapsed: 00:11 | Progress: 100%|██████████\n"
+      "Elapsed: 00:17 | Progress: 100%|██████████\n"
      ]
     },
     {
@@ -914,7 +914,7 @@
        "array([0, 0, 0, 1, 0])"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -941,7 +941,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/2. Extract Readings.ipynb b/notebooks/2. Extract Readings.ipynb
index 8379817..f8166a0 100644
--- a/notebooks/2. Extract Readings.ipynb	
+++ b/notebooks/2. Extract Readings.ipynb	
@@ -70,18 +70,18 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-14 09:42:07,018 - INFO - demo - Generating file readings/T001/2013-01-.csv\n",
-      "2020-02-14 09:42:07,574 - INFO - demo - Generating file readings/T001/2013-02-.csv\n",
-      "2020-02-14 09:42:08,123 - INFO - demo - Generating file readings/T001/2013-03-.csv\n",
-      "2020-02-14 09:42:08,668 - INFO - demo - Generating file readings/T001/2013-04-.csv\n",
-      "2020-02-14 09:42:09,231 - INFO - demo - Generating file readings/T001/2013-05-.csv\n",
-      "2020-02-14 09:42:09,782 - INFO - demo - Generating file readings/T001/2013-06-.csv\n",
-      "2020-02-14 09:42:10,342 - INFO - demo - Generating file readings/T001/2013-07-.csv\n",
-      "2020-02-14 09:42:10,929 - INFO - demo - Generating file readings/T001/2013-08-.csv\n",
-      "2020-02-14 09:42:11,468 - INFO - demo - Generating file readings/T001/2013-09-.csv\n",
-      "2020-02-14 09:42:12,023 - INFO - demo - Generating file readings/T001/2013-10-.csv\n",
-      "2020-02-14 09:42:12,571 - INFO - demo - Generating file readings/T001/2013-11-.csv\n",
-      "2020-02-14 09:42:13,127 - INFO - demo - Generating file readings/T001/2013-12-.csv\n"
+      "INFO:greenguard.demo:Generating file readings/T001/2013-01-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-02-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-03-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-04-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-05-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-06-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-07-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-08-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-09-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-10-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-11-.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-12-.csv\n"
      ]
     }
    ],
@@ -453,14 +453,14 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-14 09:42:33,976 - INFO - csv - Loaded 1306052 readings from turbine T001\n",
-      "2020-02-14 09:42:34,006 - INFO - csv - Loaded 1306052 turbine readings\n",
-      "2020-02-14 09:42:34,268 - INFO - targets - Dropped 2 invalid targets\n"
+      "INFO:greenguard.loaders.csv:Loaded 1306052 readings from turbine T001\n",
+      "INFO:greenguard.loaders.csv:Loaded 1306052 turbine readings\n",
+      "INFO:greenguard.targets:Dropped 0 targets without enough data. Final target_times size: 353\n"
      ]
     }
    ],
    "source": [
-    "target_times, readings = csv_loader.load(target_times, '1d')"
+    "new_target_times, readings = csv_loader.load(target_times, '1d')"
    ]
   },
   {
@@ -610,16 +610,6 @@
     "* `value (float)`: Numerical value of this reading."
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "We can also see in the logged output above that there is a message that indicates that there\n",
-    "are 2 invalid targets that have been dropped. This is because within our readings there was not\n",
-    "enough data to cover the entire trainin window for them, and they have been discarded to ensure\n",
-    "that there is no missing data in our problem data."
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 14,
@@ -628,7 +618,7 @@
     {
      "data": {
       "text/plain": [
-       "(351, 3)"
+       "(353, 3)"
       ]
      },
      "execution_count": 14,
@@ -637,7 +627,7 @@
     }
    ],
    "source": [
-    "target_times.shape"
+    "new_target_times.shape"
    ]
   },
   {
@@ -656,21 +646,24 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-14 09:42:54,273 - INFO - csv - Loaded 1306052 readings from turbine T001\n",
-      "2020-02-14 09:42:54,309 - INFO - csv - Loaded 1306052 turbine readings\n",
-      "2020-02-14 09:42:54,535 - INFO - targets - Dropped 29 invalid targets\n"
+      "INFO:greenguard.loaders.csv:Loaded 1309796 readings from turbine T001\n",
+      "INFO:greenguard.loaders.csv:Loaded 1309796 turbine readings\n",
+      "INFO:greenguard.targets:Dropped 28 targets without enough data. Final target_times size: 325\n"
      ]
     }
    ],
    "source": [
-    "target_times, readings = csv_loader.load(target_times, '30d')"
+    "new_target_times, readings = csv_loader.load(target_times, '30d')"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can see that now more targets were be dropped, because there was not enough data for them."
+    "We can see now in the logged output above that there is a message that indicates that there\n",
+    "were 28 invalid targets that were dropped. This is because within our readings there was not\n",
+    "enough data to cover the entire training window for each traning example, so the ones that were\n",
+    "not covered were dropped to ensure that all the training examples are valid to work with them."
    ]
   },
   {
@@ -681,7 +674,7 @@
     {
      "data": {
       "text/plain": [
-       "(322, 3)"
+       "(325, 3)"
       ]
      },
      "execution_count": 16,
@@ -690,7 +683,7 @@
     }
    ],
    "source": [
-    "target_times.shape"
+    "new_target_times.shape"
    ]
   },
   {
@@ -709,7 +702,7 @@
     {
      "data": {
       "text/plain": [
-       "(1306052, 4)"
+       "(1309796, 4)"
       ]
      },
      "execution_count": 17,
@@ -770,15 +763,38 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-14 09:43:13,166 - INFO - csv - Loaded 1239279 readings from turbine T001\n",
-      "2020-02-14 09:43:13,168 - INFO - csv - Resampling: 4h - mean\n",
-      "2020-02-14 09:43:13,443 - INFO - csv - Loaded 52286 turbine readings\n",
-      "2020-02-14 09:43:13,586 - INFO - targets - Dropped 2 invalid targets\n"
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:81749 readings reduced to 3432\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:103319 readings reduced to 4368\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:110938 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:115647 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:115979 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:111862 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:114477 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:105321 readings reduced to 4550\n",
+      "INFO:greenguard.loaders.csv:115615 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:114400 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:108371 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:112118 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:Loaded 55250 readings from turbine T001\n",
+      "INFO:greenguard.loaders.csv:Loaded 55250 turbine readings\n",
+      "INFO:greenguard.targets:Dropped 12 targets without enough data. Final target_times size: 341\n"
      ]
     }
    ],
    "source": [
-    "target_times, readings = csv_loader.load(target_times, '14d')"
+    "new_target_times, readings = csv_loader.load(target_times, '14d')"
    ]
   },
   {
@@ -796,7 +812,7 @@
     {
      "data": {
       "text/plain": [
-       "(52286, 4)"
+       "(55250, 4)"
       ]
      },
      "execution_count": 20,
@@ -845,36 +861,36 @@
        "      <th>0</th>\n",
        "      <td>T001</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2013-01-27 00:00:00</td>\n",
-       "      <td>791.333333</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>253.041667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>T001</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2013-01-27 04:00:00</td>\n",
-       "      <td>746.750000</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>572.083333</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>T001</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2013-01-27 08:00:00</td>\n",
-       "      <td>808.750000</td>\n",
+       "      <td>2013-01-10 08:00:00</td>\n",
+       "      <td>688.791667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>T001</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2013-01-27 12:00:00</td>\n",
-       "      <td>760.125000</td>\n",
+       "      <td>2013-01-10 12:00:00</td>\n",
+       "      <td>396.333333</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>T001</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2013-01-27 16:00:00</td>\n",
-       "      <td>720.833333</td>\n",
+       "      <td>2013-01-10 16:00:00</td>\n",
+       "      <td>390.458333</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -882,11 +898,11 @@
       ],
       "text/plain": [
        "  turbine_id signal_id           timestamp       value\n",
-       "0       T001       S01 2013-01-27 00:00:00  791.333333\n",
-       "1       T001       S01 2013-01-27 04:00:00  746.750000\n",
-       "2       T001       S01 2013-01-27 08:00:00  808.750000\n",
-       "3       T001       S01 2013-01-27 12:00:00  760.125000\n",
-       "4       T001       S01 2013-01-27 16:00:00  720.833333"
+       "0       T001       S01 2013-01-10 00:00:00  253.041667\n",
+       "1       T001       S01 2013-01-10 04:00:00  572.083333\n",
+       "2       T001       S01 2013-01-10 08:00:00  688.791667\n",
+       "3       T001       S01 2013-01-10 12:00:00  396.333333\n",
+       "4       T001       S01 2013-01-10 16:00:00  390.458333"
       ]
      },
      "execution_count": 21,
@@ -906,7 +922,7 @@
     {
      "data": {
       "text/plain": [
-       "(320, 3)"
+       "(341, 3)"
       ]
      },
      "execution_count": 22,
@@ -915,7 +931,7 @@
     }
    ],
    "source": [
-    "target_times.shape"
+    "new_target_times.shape"
    ]
   },
   {
@@ -943,16 +959,39 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-02-14 09:43:33,528 - INFO - csv - Loaded 1231791 readings from turbine T001\n",
-      "2020-02-14 09:43:33,530 - INFO - csv - Resampling: 4h - mean\n",
-      "2020-02-14 09:43:33,831 - INFO - csv - Loaded 1999 turbine readings\n",
-      "2020-02-14 09:43:33,970 - INFO - targets - Dropped 2 invalid targets\n"
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:81749 readings reduced to 3432\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:103319 readings reduced to 4368\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:110938 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:115979 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:108371 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:105321 readings reduced to 4550\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:115615 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:114400 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:115647 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:112118 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:111862 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:114477 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:Loaded 2125 readings from turbine T001\n",
+      "INFO:greenguard.loaders.csv:Loaded 2125 turbine readings\n",
+      "INFO:greenguard.targets:Dropped 12 targets without enough data. Final target_times size: 341\n"
      ]
     }
    ],
    "source": [
     "csv_loader = CSVLoader(readings_path, rule='4h', aggregation='mean', unstack=True)\n",
-    "target_times, readings = csv_loader.load(target_times, '14d')"
+    "new_target_times, readings = csv_loader.load(target_times, '14d')"
    ]
   },
   {
@@ -970,7 +1009,7 @@
     {
      "data": {
       "text/plain": [
-       "(1999, 28)"
+       "(2125, 28)"
       ]
      },
      "execution_count": 24,
@@ -1035,122 +1074,122 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-28 00:00:00</td>\n",
-       "      <td>715.750000</td>\n",
-       "      <td>709.333333</td>\n",
-       "      <td>710.208333</td>\n",
-       "      <td>796.666667</td>\n",
-       "      <td>771.750000</td>\n",
-       "      <td>732.916667</td>\n",
-       "      <td>766.166667</td>\n",
-       "      <td>3.361627e+06</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>253.041667</td>\n",
+       "      <td>268.250000</td>\n",
+       "      <td>268.041667</td>\n",
+       "      <td>297.166667</td>\n",
+       "      <td>234.666667</td>\n",
+       "      <td>261.916667</td>\n",
+       "      <td>206.791667</td>\n",
+       "      <td>3.198335e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>13.487500</td>\n",
-       "      <td>4.272212e+06</td>\n",
-       "      <td>49.041667</td>\n",
-       "      <td>49.041667</td>\n",
-       "      <td>49.041667</td>\n",
-       "      <td>49.041667</td>\n",
-       "      <td>49.041667</td>\n",
-       "      <td>49.041667</td>\n",
-       "      <td>49.041667</td>\n",
-       "      <td>336.000000</td>\n",
+       "      <td>9.079167</td>\n",
+       "      <td>3.134510e+06</td>\n",
+       "      <td>42.416667</td>\n",
+       "      <td>44.958333</td>\n",
+       "      <td>44.833333</td>\n",
+       "      <td>49.625000</td>\n",
+       "      <td>39.208333</td>\n",
+       "      <td>43.833333</td>\n",
+       "      <td>34.625</td>\n",
+       "      <td>293.166667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-28 04:00:00</td>\n",
-       "      <td>779.416667</td>\n",
-       "      <td>777.500000</td>\n",
-       "      <td>779.666667</td>\n",
-       "      <td>824.125000</td>\n",
-       "      <td>800.083333</td>\n",
-       "      <td>765.291667</td>\n",
-       "      <td>791.958333</td>\n",
-       "      <td>3.362652e+06</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>572.083333</td>\n",
+       "      <td>555.291667</td>\n",
+       "      <td>538.666667</td>\n",
+       "      <td>592.291667</td>\n",
+       "      <td>557.166667</td>\n",
+       "      <td>534.000000</td>\n",
+       "      <td>544.250000</td>\n",
+       "      <td>3.199514e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>14.695833</td>\n",
-       "      <td>4.279238e+06</td>\n",
-       "      <td>43.875000</td>\n",
-       "      <td>43.875000</td>\n",
-       "      <td>43.875000</td>\n",
-       "      <td>43.875000</td>\n",
-       "      <td>43.916667</td>\n",
-       "      <td>43.875000</td>\n",
-       "      <td>43.916667</td>\n",
-       "      <td>301.083333</td>\n",
+       "      <td>10.837500</td>\n",
+       "      <td>3.142505e+06</td>\n",
+       "      <td>62.083333</td>\n",
+       "      <td>62.500000</td>\n",
+       "      <td>63.625000</td>\n",
+       "      <td>63.541667</td>\n",
+       "      <td>61.333333</td>\n",
+       "      <td>62.541667</td>\n",
+       "      <td>54.000</td>\n",
+       "      <td>421.208333</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-28 08:00:00</td>\n",
-       "      <td>732.583333</td>\n",
-       "      <td>757.375000</td>\n",
-       "      <td>738.125000</td>\n",
-       "      <td>794.583333</td>\n",
-       "      <td>765.291667</td>\n",
-       "      <td>736.541667</td>\n",
-       "      <td>766.916667</td>\n",
-       "      <td>3.364190e+06</td>\n",
+       "      <td>2013-01-10 08:00:00</td>\n",
+       "      <td>688.791667</td>\n",
+       "      <td>696.791667</td>\n",
+       "      <td>706.625000</td>\n",
+       "      <td>750.791667</td>\n",
+       "      <td>714.250000</td>\n",
+       "      <td>683.333333</td>\n",
+       "      <td>658.166667</td>\n",
+       "      <td>3.201449e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>14.100000</td>\n",
-       "      <td>4.289814e+06</td>\n",
-       "      <td>81.666667</td>\n",
-       "      <td>82.375000</td>\n",
-       "      <td>82.416667</td>\n",
-       "      <td>82.875000</td>\n",
-       "      <td>82.541667</td>\n",
-       "      <td>83.250000</td>\n",
-       "      <td>81.416667</td>\n",
-       "      <td>564.041667</td>\n",
+       "      <td>12.754167</td>\n",
+       "      <td>3.155809e+06</td>\n",
+       "      <td>92.208333</td>\n",
+       "      <td>94.958333</td>\n",
+       "      <td>94.666667</td>\n",
+       "      <td>97.333333</td>\n",
+       "      <td>94.125000</td>\n",
+       "      <td>93.583333</td>\n",
+       "      <td>86.375</td>\n",
+       "      <td>638.291667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-28 12:00:00</td>\n",
-       "      <td>743.833333</td>\n",
-       "      <td>779.083333</td>\n",
-       "      <td>775.833333</td>\n",
-       "      <td>804.208333</td>\n",
-       "      <td>771.458333</td>\n",
-       "      <td>736.166667</td>\n",
-       "      <td>761.000000</td>\n",
-       "      <td>3.366258e+06</td>\n",
+       "      <td>2013-01-10 12:00:00</td>\n",
+       "      <td>396.333333</td>\n",
+       "      <td>418.500000</td>\n",
+       "      <td>415.791667</td>\n",
+       "      <td>438.541667</td>\n",
+       "      <td>382.250000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>320.333333</td>\n",
+       "      <td>3.203319e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>13.691667</td>\n",
-       "      <td>4.304198e+06</td>\n",
-       "      <td>88.250000</td>\n",
-       "      <td>90.833333</td>\n",
-       "      <td>90.875000</td>\n",
-       "      <td>91.500000</td>\n",
-       "      <td>90.166667</td>\n",
-       "      <td>90.875000</td>\n",
-       "      <td>88.916667</td>\n",
-       "      <td>616.833333</td>\n",
+       "      <td>10.916667</td>\n",
+       "      <td>3.168640e+06</td>\n",
+       "      <td>55.750000</td>\n",
+       "      <td>60.083333</td>\n",
+       "      <td>58.583333</td>\n",
+       "      <td>61.291667</td>\n",
+       "      <td>52.791667</td>\n",
+       "      <td>52.791667</td>\n",
+       "      <td>44.000</td>\n",
+       "      <td>376.125000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-28 16:00:00</td>\n",
-       "      <td>640.416667</td>\n",
-       "      <td>678.000000</td>\n",
-       "      <td>675.958333</td>\n",
-       "      <td>709.166667</td>\n",
-       "      <td>675.833333</td>\n",
-       "      <td>670.666667</td>\n",
-       "      <td>682.166667</td>\n",
-       "      <td>3.368310e+06</td>\n",
+       "      <td>2013-01-10 16:00:00</td>\n",
+       "      <td>390.458333</td>\n",
+       "      <td>408.875000</td>\n",
+       "      <td>409.500000</td>\n",
+       "      <td>458.000000</td>\n",
+       "      <td>415.583333</td>\n",
+       "      <td>363.000000</td>\n",
+       "      <td>364.458333</td>\n",
+       "      <td>3.204504e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>12.454167</td>\n",
-       "      <td>4.318658e+06</td>\n",
-       "      <td>80.458333</td>\n",
-       "      <td>83.541667</td>\n",
-       "      <td>85.333333</td>\n",
-       "      <td>85.916667</td>\n",
-       "      <td>83.500000</td>\n",
-       "      <td>86.375000</td>\n",
-       "      <td>83.333333</td>\n",
-       "      <td>574.958333</td>\n",
+       "      <td>10.412500</td>\n",
+       "      <td>3.176672e+06</td>\n",
+       "      <td>49.958333</td>\n",
+       "      <td>53.875000</td>\n",
+       "      <td>54.458333</td>\n",
+       "      <td>56.750000</td>\n",
+       "      <td>52.708333</td>\n",
+       "      <td>46.708333</td>\n",
+       "      <td>47.625</td>\n",
+       "      <td>354.750000</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1159,32 +1198,32 @@
       ],
       "text/plain": [
        "  turbine_id           timestamp   value_S01   value_S02   value_S03  \\\n",
-       "0       T001 2013-01-28 00:00:00  715.750000  709.333333  710.208333   \n",
-       "1       T001 2013-01-28 04:00:00  779.416667  777.500000  779.666667   \n",
-       "2       T001 2013-01-28 08:00:00  732.583333  757.375000  738.125000   \n",
-       "3       T001 2013-01-28 12:00:00  743.833333  779.083333  775.833333   \n",
-       "4       T001 2013-01-28 16:00:00  640.416667  678.000000  675.958333   \n",
+       "0       T001 2013-01-10 00:00:00  253.041667  268.250000  268.041667   \n",
+       "1       T001 2013-01-10 04:00:00  572.083333  555.291667  538.666667   \n",
+       "2       T001 2013-01-10 08:00:00  688.791667  696.791667  706.625000   \n",
+       "3       T001 2013-01-10 12:00:00  396.333333  418.500000  415.791667   \n",
+       "4       T001 2013-01-10 16:00:00  390.458333  408.875000  409.500000   \n",
        "\n",
        "    value_S04   value_S05   value_S06   value_S07     value_S08  ...  \\\n",
-       "0  796.666667  771.750000  732.916667  766.166667  3.361627e+06  ...   \n",
-       "1  824.125000  800.083333  765.291667  791.958333  3.362652e+06  ...   \n",
-       "2  794.583333  765.291667  736.541667  766.916667  3.364190e+06  ...   \n",
-       "3  804.208333  771.458333  736.166667  761.000000  3.366258e+06  ...   \n",
-       "4  709.166667  675.833333  670.666667  682.166667  3.368310e+06  ...   \n",
+       "0  297.166667  234.666667  261.916667  206.791667  3.198335e+06  ...   \n",
+       "1  592.291667  557.166667  534.000000  544.250000  3.199514e+06  ...   \n",
+       "2  750.791667  714.250000  683.333333  658.166667  3.201449e+06  ...   \n",
+       "3  438.541667  382.250000  364.666667  320.333333  3.203319e+06  ...   \n",
+       "4  458.000000  415.583333  363.000000  364.458333  3.204504e+06  ...   \n",
        "\n",
        "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
-       "0  13.487500  4.272212e+06  49.041667  49.041667  49.041667  49.041667   \n",
-       "1  14.695833  4.279238e+06  43.875000  43.875000  43.875000  43.875000   \n",
-       "2  14.100000  4.289814e+06  81.666667  82.375000  82.416667  82.875000   \n",
-       "3  13.691667  4.304198e+06  88.250000  90.833333  90.875000  91.500000   \n",
-       "4  12.454167  4.318658e+06  80.458333  83.541667  85.333333  85.916667   \n",
+       "0   9.079167  3.134510e+06  42.416667  44.958333  44.833333  49.625000   \n",
+       "1  10.837500  3.142505e+06  62.083333  62.500000  63.625000  63.541667   \n",
+       "2  12.754167  3.155809e+06  92.208333  94.958333  94.666667  97.333333   \n",
+       "3  10.916667  3.168640e+06  55.750000  60.083333  58.583333  61.291667   \n",
+       "4  10.412500  3.176672e+06  49.958333  53.875000  54.458333  56.750000   \n",
        "\n",
        "   value_S23  value_S24  value_S25   value_S26  \n",
-       "0  49.041667  49.041667  49.041667  336.000000  \n",
-       "1  43.916667  43.875000  43.916667  301.083333  \n",
-       "2  82.541667  83.250000  81.416667  564.041667  \n",
-       "3  90.166667  90.875000  88.916667  616.833333  \n",
-       "4  83.500000  86.375000  83.333333  574.958333  \n",
+       "0  39.208333  43.833333     34.625  293.166667  \n",
+       "1  61.333333  62.541667     54.000  421.208333  \n",
+       "2  94.125000  93.583333     86.375  638.291667  \n",
+       "3  52.791667  52.791667     44.000  376.125000  \n",
+       "4  52.708333  46.708333     47.625  354.750000  \n",
        "\n",
        "[5 rows x 28 columns]"
       ]
@@ -1221,7 +1260,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "target_times.to_csv('my_problem_target_times.csv', index=False)"
+    "new_target_times.to_csv('my_problem_target_times.csv', index=False)"
    ]
   },
   {
@@ -1290,7 +1329,7 @@
     "import pickle\n",
     "\n",
     "with open('my_problem.plk', 'wb') as pickle_file:\n",
-    "    pickle.dump((target_times, readings), pickle_file)"
+    "    pickle.dump((new_target_times, readings), pickle_file)"
    ]
   },
   {
@@ -1327,7 +1366,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.6.8"
   }
  },
  "nbformat": 4,

From 4dbbc33a03b591e6fdc88b636dbb7c6a6300c136 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Tue, 25 Feb 2020 00:28:30 -0500
Subject: [PATCH 026/171] Fix docker config for windows and mac

---
 Dockerfile         | 11 ++---------
 docker-compose.yml |  1 -
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 3aeebd1..0917f4c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,18 +3,11 @@ FROM python:3.6
 ARG UID=1000
 EXPOSE 8888
 
-RUN adduser jupyter --uid $UID --disabled-password --system
-
 RUN mkdir /app
 COPY setup.py /app
-RUN mkdir /app/greenguard
-COPY greenguard/__init__.py /app/greenguard
-RUN pip install -e /app jupyter
-
-RUN rm -r /app/greenguard
 COPY greenguard /app/greenguard
 COPY notebooks /app/notebooks
+RUN pip install -e /app jupyter
 
 WORKDIR /app
-USER jupyter
-CMD /usr/local/bin/jupyter notebook --ip 0.0.0.0 --NotebookApp.token=''
+CMD pip install -e /app && /usr/local/bin/jupyter notebook --ip 0.0.0.0 --NotebookApp.token='' --allow-root
diff --git a/docker-compose.yml b/docker-compose.yml
index a839518..dfb7aed 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,7 +1,6 @@
 version: '3'
 services:
   jupyter:
-    network_mode: host
     build:
       context: .
       args:

From 10dcfebde5ce41c6d4c9f294ad4e59a8ac59f42e Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Tue, 25 Feb 2020 00:49:50 -0500
Subject: [PATCH 027/171] Update MANIFEST to include pipelines

---
 MANIFEST.in | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/MANIFEST.in b/MANIFEST.in
index 469520f..4ebe1c6 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -4,6 +4,8 @@ include HISTORY.md
 include LICENSE
 include README.md
 
+recursive-include greenguard *.json
+
 recursive-include tests *
 recursive-exclude * __pycache__
 recursive-exclude * *.py[co]

From b2b6784d4b25b028bc340ee0189a2e1d1333f2d3 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 18 May 2020 20:08:01 +0200
Subject: [PATCH 028/171] Update links

---
 README.md | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/README.md b/README.md
index aecf61c..bef0983 100644
--- a/README.md
+++ b/README.md
@@ -13,17 +13,17 @@ AutoML for Renewable Energy Industries.
 
 
 [![PyPI Shield](https://img.shields.io/pypi/v/greenguard.svg)](https://pypi.python.org/pypi/greenguard)
-[![Travis CI Shield](https://travis-ci.org/D3-AI/GreenGuard.svg?branch=master)](https://travis-ci.org/D3-AI/GreenGuard)
+[![Travis CI Shield](https://travis-ci.org/signals-dev/GreenGuard.svg?branch=master)](https://travis-ci.org/signals-dev/GreenGuard)
 [![Downloads](https://pepy.tech/badge/greenguard)](https://pepy.tech/project/greenguard)
 <!--
-[![Coverage Status](https://codecov.io/gh/D3-AI/GreenGuard/branch/master/graph/badge.svg)](https://codecov.io/gh/D3-AI/GreenGuard)
+[![Coverage Status](https://codecov.io/gh/signals-dev/GreenGuard/branch/master/graph/badge.svg)](https://codecov.io/gh/signals-dev/GreenGuard)
 -->
 
 # GreenGuard
 
-- License: [MIT](https://github.com/D3-AI/GreenGuard/blob/master/LICENSE)
-- Documentation: https://D3-AI.github.io/GreenGuard
-- Homepage: https://github.com/D3-AI/GreenGuard
+- License: [MIT](https://github.com/signals-dev/GreenGuard/blob/master/LICENSE)
+- Documentation: https://signals-dev.github.io/GreenGuard
+- Homepage: https://github.com/signals-dev/GreenGuard
 
 # Overview
 
@@ -78,7 +78,7 @@ https://docs.docker.com/compose/install/) installed on your system and then foll
 1. Clone this repository and go into the `GreenGuard` folder:
 
 ```bash
-git clone git@github.com:D3-AI/GreenGuard.git
+git clone git@github.com:signals-dev/GreenGuard.git
 cd GreenGuard
 ```
 
@@ -298,6 +298,6 @@ f1_score(test_targets, predictions)
 ## What's next?
 
 For more details about **GreenGuard** and all its possibilities and features, please check the
-[project documentation site](https://D3-AI.github.io/GreenGuard/)
+[project documentation site](https://signals-dev.github.io/GreenGuard/)
 Also do not forget to have a look at the [notebook tutorials](
-https://github.com/D3-AI/GreenGuard/tree/master/notebooks)!
+https://github.com/signals-dev/GreenGuard/tree/master/notebooks)!

From 47981a4efb213970b7e75602bdf936cd558e5870 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 19 May 2020 14:35:46 +0200
Subject: [PATCH 029/171] Integrate BTBSession with greenguard.

---
 greenguard/pipeline.py | 186 +++++++++++++++++++----------------------
 setup.py               |   3 +-
 2 files changed, 90 insertions(+), 99 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 5ed7ec1..a2119e6 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -1,14 +1,15 @@
 # -*- coding: utf-8 -*-
 
+import json
 import logging
 import os
-from collections import defaultdict
 from copy import deepcopy
+from hashlib import md5
 
 import cloudpickle
 import numpy as np
-from btb import HyperParameter
-from btb.tuning import GP
+from btb import BTBSession
+from btb.tuning import Tunable
 from mlblocks import MLPipeline
 from mlblocks.discovery import load_pipeline
 from sklearn.exceptions import NotFittedError
@@ -157,7 +158,8 @@ def _count_static_steps(self):
         return 0
 
     def _build_pipeline(self):
-        self._pipeline = MLPipeline(self.template)
+        self._pipeline = MLPipeline(self.template_name)
+
         if self._hyperparameters:
             self._pipeline.set_hyperparameters(self._hyperparameters)
 
@@ -184,6 +186,25 @@ def set_init_params(self, init_params):
         self._update_params(template_params, init_params)
         self._build_pipeline()
 
+    @staticmethod
+    def _get_templates(template):
+        if not isinstance(template, list):
+            templates = [template]
+        else:
+            templates = template
+
+        templates_dict = dict()
+        for template in templates:
+            if isinstance(template, str):
+                template_name = template
+                template = load_pipeline(template_name)
+            else:
+                template_name = md5(json.dumps(template)).digest()
+
+            templates_dict[template_name] = template
+
+        return templates_dict
+
     def __init__(self, template, metric='accuracy', cost=False, init_params=None, stratify=True,
                  cv_splits=5, shuffle=True, random_state=0, preprocessing=0):
 
@@ -194,12 +215,12 @@ def __init__(self, template, metric='accuracy', cost=False, init_params=None, st
 
         self._metric = metric
         self._cost = cost
+        self.cv_score = np.inf if cost else -np.inf
+        self._splits = dict()
 
-        if isinstance(template, str):
-            self.template_name = template
-            self.template = load_pipeline(template)
-        else:
-            self.template = template
+        self.templates = self._get_templates(template)
+        self.template_name = list(self.templates.keys())[0]
+        self.template = self.templates[self.template_name]
 
         # Make sure to have block number in all init_params names
         template_params = self.template.setdefault('init_params', dict())
@@ -270,9 +291,14 @@ def _is_better(self, score):
 
         return score > self.cv_score
 
-    def _generate_splits(self, X, y, readings, turbines=None):
+    def _generate_splits(self, template_name, target_times, readings, turbines=None):
+        template = self.templates.get(template_name)
+
+        X = target_times[['turbine_id', 'cutoff_time']]
+        y = target_times['target']
+
         if self._preprocessing:
-            pipeline = MLPipeline(self.template)
+            pipeline = MLPipeline(template)
             LOGGER.debug('Running %s preprocessing steps', self._preprocessing)
             context = pipeline.fit(X=X, y=y, readings=readings,
                                    turbines=turbines, output_=self._preprocessing - 1)
@@ -290,7 +316,7 @@ def _generate_splits(self, X, y, readings, turbines=None):
             X_train, X_test = X.iloc[train_index], X.iloc[test_index]
             y_train, y_test = y.iloc[train_index], y.iloc[test_index]
 
-            pipeline = MLPipeline(self.template)
+            pipeline = MLPipeline(template)
             fit = pipeline.fit(X_train, y_train, output_=self._static - 1,
                                start_=self._preprocessing, **context)
             predict = pipeline.predict(X_test, output_=self._static - 1,
@@ -300,7 +326,7 @@ def _generate_splits(self, X, y, readings, turbines=None):
 
         return splits
 
-    def cross_validate(self, X=None, y=None, readings=None, turbines=None, params=None):
+    def cross_validate(self, template_splits=None, params=None):
         """Compute cross validation score using the given data.
 
         If the splits have not been previously computed, compute them now.
@@ -332,19 +358,17 @@ def cross_validate(self, X=None, y=None, readings=None, turbines=None, params=No
                 Computed cross validation score. This score is the average
                 of the scores obtained accross all the cross validation folds.
         """
-
-        if self._splits is None:
-            LOGGER.info('Running static steps before cross validation')
-            self._splits = self._generate_splits(X, y, readings, turbines)
-
         scores = []
-        for fold, pipeline, fit, predict, y_test in self._splits:
+        if template_splits is None:
+            template_splits = self._splits.get(self.template_name)
+
+        for fold, pipeline, fit, predict, y_test in template_splits:
             LOGGER.debug('Scoring fold %s', fold)
 
             if params:
                 pipeline.set_hyperparameters(params)
             else:
-                pipeline.set_hyperparameters(self._pipeline.get_hyperparameters())
+                pipeline.set_hyperparameters(pipeline.get_hyperparameters())
 
             pipeline.fit(start_=self._static, **fit)
             predictions = pipeline.predict(start_=self._static, **predict)
@@ -355,71 +379,61 @@ def cross_validate(self, X=None, y=None, readings=None, turbines=None, params=No
             scores.append(score)
 
         cv_score = np.mean(scores)
-        if self.cv_score is None:
-            self.cv_score = cv_score
-
         return cv_score
 
-    def _to_dicts(self, hyperparameters):
-        params_tree = defaultdict(dict)
-        for (block, hyperparameter), value in hyperparameters.items():
-            if isinstance(value, np.integer):
-                value = int(value)
+    @staticmethod
+    def _parse_params(param_details):
+        param_type = param_details['type']
+        param_details['type'] = 'str' if param_type == 'string' else param_type
+
+        if param_details['type'] == 'bool':
+            param_details['range'] = [True, False]
+        else:
+            param_details['range'] = param_details.get('range') or param_details.get('values')
 
-            elif isinstance(value, np.floating):
-                value = float(value)
+        if 'default' not in param_details:
+            param_details['default'] = param_details['range'][0]
 
-            elif isinstance(value, np.ndarray):
-                value = value.tolist()
+        return param_details
 
-            elif value == 'None':
-                value = None
+    @classmethod
+    def _get_tunables(cls, templates):
+        pipelines = {name: MLPipeline(template) for name, template in templates.items()}
+        tunables = {}
 
-            params_tree[block][hyperparameter] = value
+        for pipeline_name, pipeline in pipelines.items():
+            pipeline_tunables = {}
+            for name, param_details in pipeline.get_tunable_hyperparameters(flat=True).items():
+                pipeline_tunables[name] = cls._parse_params(param_details)
 
-        return params_tree
+            tunables[pipeline_name] = Tunable.from_dict(pipeline_tunables)
 
-    def _to_tuples(self, params_tree, tunable_keys):
-        param_tuples = defaultdict(dict)
-        for block_name, params in params_tree.items():
-            for param, value in params.items():
-                key = (block_name, param)
-                if key in tunable_keys:
-                    param_tuples[key] = 'None' if value is None else value
+        return tunables
 
-        return param_tuples
+    def _make_btb_scorer(self, target_times, readings, turbines):
 
-    def _get_tunables(self):
-        tunables = []
-        tunable_keys = []
-        for block_name, params in self._pipeline.get_tunable_hyperparameters().items():
-            for param_name, param_details in params.items():
-                key = (block_name, param_name)
-                param_type = param_details['type']
-                param_type = 'string' if param_type == 'str' else param_type
+        def scorer(template_name, config):
 
-                if param_type == 'bool':
-                    param_range = [True, False]
-                else:
-                    param_range = param_details.get('range') or param_details.get('values')
+            template_splits = self._splits.get(template_name)
+            if not template_splits:
+                template_splits = self._generate_splits(
+                    template_name, target_times, readings, turbines)
 
-                value = HyperParameter(param_type, param_range)
-                tunables.append((key, value))
-                tunable_keys.append(key)
+                self._splits[template_name] = template_splits
 
-        return tunables, tunable_keys
+            score = self.cross_validate(template_splits, config)
 
-    def _get_tuner(self):
-        tunables, tunable_keys = self._get_tunables()
-        tuner = GP(tunables)
+            if self._is_better(score):
+                self.cv_score = score
+                self.template_name = template_name
+                self._hyperparameters = deepcopy(config)
+                self._build_pipeline()
 
-        # Inform the tuner about the score that the default hyperparmeters obtained
-        param_tuples = self._to_tuples(self._pipeline.get_hyperparameters(), tunable_keys)
-        tuner.add(param_tuples, self.cv_score)
+            return score
 
-        return tuner
+        return scorer
 
-    def tune(self, target_times=None, readings=None, turbines=None, iterations=10):
+    def tune(self, target_times, readings, turbines=None, iterations=10):
         """Tune this pipeline for the indicated number of iterations.
 
         Args:
@@ -436,37 +450,13 @@ def tune(self, target_times=None, readings=None, turbines=None, iterations=10):
             iterations (int):
                 Number of iterations to perform.
         """
-        if not self._tuner:
-            LOGGER.info('Scoring the default pipeline')
-            X = target_times[['turbine_id', 'cutoff_time']]
-            y = target_times['target']
-            self.cv_score = self.cross_validate(X, y, readings, turbines)
-
-            LOGGER.info('Default Pipeline score: %s', self.cv_score)
-
-            self._tuner = self._get_tuner()
-
-        for i in range(self.iterations, self.iterations + iterations):
-            LOGGER.info('Scoring pipeline %s', i + 1)
-
-            params = self._tuner.propose(1)
-            param_dicts = self._to_dicts(params)
-
-            try:
-                score = self.cross_validate(params=param_dicts)
-
-                LOGGER.info('Pipeline %s score: %s', i + 1, score)
-
-                if self._is_better(score):
-                    self.cv_score = score
-                    self.set_hyperparameters(param_dicts)
-
-                self._tuner.add(params, score)
+        scoring_function = self._make_btb_scorer(target_times, readings, turbines)
+        tunables = self._get_tunables(self.templates)
+        session = BTBSession(tunables, scoring_function, maximize=not self._cost)
+        if iterations:
+            session.run(iterations)
 
-            except Exception:
-                failed = '\n'.join('{}: {}'.format(k, v) for k, v in params.items())
-                LOGGER.exception("Caught an exception scoring pipeline %s with params:\n%s",
-                                 i + 1, failed)
+        return session
 
     def fit(self, target_times, readings, turbines=None):
         """Fit this pipeline to the given data.
diff --git a/setup.py b/setup.py
index 96b2019..0389bee 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,8 @@
 install_requires = [
     'mlblocks>=0.3.4,<0.4',
     'mlprimitives>=0.2.4,<0.3',
-    'baytune>=0.2.3,<0.3',
+    'scipy>=1.0.1,<1.4.0',
+    'baytune>=0.3.9,<0.4',
     'numpy>=1.15.4,<1.17',
     'pymongo>=3.7.2,<4',
     'scikit-learn>=0.20.1,<0.21',

From de21d40b2517c1a12d1ac06e0c9cdb7fb668505f Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Thu, 21 May 2020 13:10:28 +0200
Subject: [PATCH 030/171] =?UTF-8?q?Bump=20version:=200.2.1.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.2.1.dev1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 35fcad0..df69d4a 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.1.dev0'
+__version__ = '0.2.1.dev1'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 3596a0b..f6734b1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.1.dev0
+current_version = 0.2.1.dev1
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 96b2019..85d6a2c 100644
--- a/setup.py
+++ b/setup.py
@@ -104,6 +104,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.1.dev0',
+    version='0.2.1.dev1',
     zip_safe=False,
 )

From ffc27fef213853c0d431005c69c7b7a3c6c45279 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 1 Jun 2020 17:21:02 +0200
Subject: [PATCH 031/171] WIP: Support multiple templates for BTBSession.

---
 greenguard/pipeline.py | 200 +++++++++++++++++++++--------------------
 1 file changed, 103 insertions(+), 97 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index a2119e6..190997a 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -138,8 +138,9 @@ class GreenGuardPipeline(object):
     _cost = False
     _tuner = None
     _pipeline = None
-    _splits = None
     _static = None
+    _init_params = None
+    _preprocessing = None
 
     def _get_cv(self, stratify, cv_splits, shuffle, random_state):
         if stratify:
@@ -149,22 +150,15 @@ def _get_cv(self, stratify, cv_splits, shuffle, random_state):
 
         return cv_class(n_splits=cv_splits, shuffle=shuffle, random_state=random_state)
 
-    def _count_static_steps(self):
-        tunable_hyperparams = self._pipeline.get_tunable_hyperparameters()
-        for index, block_name in enumerate(self._pipeline.blocks.keys()):
+    @staticmethod
+    def _count_static_steps(pipeline):
+        tunable_hyperparams = pipeline.get_tunable_hyperparameters()
+        for index, block_name in enumerate(pipeline.blocks.keys()):
             if tunable_hyperparams[block_name]:
                 return index
 
         return 0
 
-    def _build_pipeline(self):
-        self._pipeline = MLPipeline(self.template_name)
-
-        if self._hyperparameters:
-            self._pipeline.set_hyperparameters(self._hyperparameters)
-
-        self.fitted = False
-
     @staticmethod
     def _update_params(old, new):
         for name, params in new.items():
@@ -175,17 +169,6 @@ def _update_params(old, new):
             for param, value in params.items():
                 block_params[param] = value
 
-    def set_init_params(self, init_params):
-        """Set new init params for the template and pipeline.
-
-        Args:
-            init_params (dict):
-                New init_params to use.
-        """
-        template_params = self.template['init_params']
-        self._update_params(template_params, init_params)
-        self._build_pipeline()
-
     @staticmethod
     def _get_templates(template):
         if not isinstance(template, list):
@@ -205,6 +188,42 @@ def _get_templates(template):
 
         return templates_dict
 
+    def _get_init_params(self, template_name):
+        if self._init_params is None:
+            return {}
+
+        elif template_name in self._init_params:
+            return self._init_params.get(template_name)
+
+        return self._init_params
+
+    def _get_preprocessing(self, template_name):
+        if isinstance(self._preprocessing, int):
+            return self._preprocessing
+
+        if isinstance(self._preprocessing, dict):
+            return self._preprocessing.get(template_name) or 0
+
+        return 0  # by default
+
+    def _build_pipeline(self, hyperparameters=None):
+        template_params = self.template.setdefault('init_params', dict())
+        for name, params in list(template_params.items()):
+            if '#' not in name:
+                template_params[name + '#1'] = template_params.pop(name)
+
+        init_params = self._get_init_params(self.template_name)
+
+        if init_params:
+            self._update_params(template_params, init_params)
+
+        self._pipeline = MLPipeline(self.template_name)
+
+        if hyperparameters:
+            self._pipeline.set_hyperparameters(hyperparameters)
+
+        self.fitted = False
+
     def __init__(self, template, metric='accuracy', cost=False, init_params=None, stratify=True,
                  cv_splits=5, shuffle=True, random_state=0, preprocessing=0):
 
@@ -215,39 +234,33 @@ def __init__(self, template, metric='accuracy', cost=False, init_params=None, st
 
         self._metric = metric
         self._cost = cost
+        self._init_params = init_params
+        self._preprocessing = preprocessing
+
         self.cv_score = np.inf if cost else -np.inf
-        self._splits = dict()
 
         self.templates = self._get_templates(template)
         self.template_name = list(self.templates.keys())[0]
         self.template = self.templates[self.template_name]
 
-        # Make sure to have block number in all init_params names
-        template_params = self.template.setdefault('init_params', dict())
-        for name, params in list(template_params.items()):
-            if '#' not in name:
-                template_params[name + '#1'] = template_params.pop(name)
-
-        self._hyperparameters = dict()
-        if init_params:
-            self.set_init_params(init_params)
-        else:
-            self._build_pipeline()
-
-        self._static = self._count_static_steps()
-        self._preprocessing = preprocessing
-
-        self.steps = self._pipeline.primitives.copy()
-        self.preprocessing = self.steps[:self._preprocessing]
-        self.static = self.steps[self._preprocessing:self._static]
-        self.tunable = self.steps[self._static:]
+        self._build_pipeline()
 
-        if self._preprocessing and (self._preprocessing > self._static):
+        _static = self._count_static_steps(self._pipeline)
+        _preprocessing = self._get_preprocessing(self.template_name)
+        if _preprocessing and (_preprocessing > _static):
             raise ValueError('Preprocessing cannot be bigger than static')
 
         self.iterations = 0
 
     def __repr__(self):
+        steps = self._pipeline.primitives.copy()
+        preprocessing = self._get_preprocessing(self.template_name)
+        static = self._count_static_steps()
+
+        preprocessing_steps = steps[:preprocessing]
+        static_steps = steps[preprocessing:static]
+        tunable_steps = steps[static:]
+
         return (
             "GreenGuardPipeline({})\n"
             "  preprocessing:\n{}\n"
@@ -255,9 +268,9 @@ def __repr__(self):
             "  tunable:\n{}\n"
         ).format(
             self.template_name,
-            '\n'.join('    {}'.format(step) for step in self.preprocessing),
-            '\n'.join('    {}'.format(step) for step in self.static),
-            '\n'.join('    {}'.format(step) for step in self.tunable),
+            '\n'.join('    {}'.format(step) for step in preprocessing_steps),
+            '\n'.join('    {}'.format(step) for step in static_steps),
+            '\n'.join('    {}'.format(step) for step in tunable_steps),
         )
 
     def get_hyperparameters(self):
@@ -269,18 +282,6 @@ def get_hyperparameters(self):
         """
         return deepcopy(self._hyperparameters)
 
-    def set_hyperparameters(self, hyperparameters):
-        """Set new hyperparameters for this pipeline instance.
-
-        The template ``init_params`` remain unmodified.
-
-        Args:
-            hyperparameters (dict):
-                New hyperparameters to use.
-        """
-        self._update_params(self._hyperparameters, hyperparameters)
-        self._build_pipeline()
-
     @staticmethod
     def _clone_pipeline(pipeline):
         return MLPipeline.from_dict(pipeline.to_dict())
@@ -297,11 +298,14 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
         X = target_times[['turbine_id', 'cutoff_time']]
         y = target_times['target']
 
-        if self._preprocessing:
-            pipeline = MLPipeline(template)
-            LOGGER.debug('Running %s preprocessing steps', self._preprocessing)
+        pipeline = MLPipeline(template)
+        preprocessing = self._get_preprocessing(template_name)
+        static = self._count_static_steps(pipeline)
+
+        if preprocessing:
+            LOGGER.debug('Running %s preprocessing steps', preprocessing)
             context = pipeline.fit(X=X, y=y, readings=readings,
-                                   turbines=turbines, output_=self._preprocessing - 1)
+                                   turbines=turbines, output_=preprocessing - 1)
             del context['X']
             del context['y']
         else:
@@ -311,22 +315,26 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
             }
 
         splits = list()
-        for fold, (train_index, test_index) in enumerate(self._cv.split(X, y)):
-            LOGGER.debug('Running static steps for fold %s', fold)
-            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
-            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
+        try:
+            for fold, (train_index, test_index) in enumerate(self._cv.split(X, y)):
+                LOGGER.debug('Running static steps for fold %s', fold)
+                X_train, X_test = X.iloc[train_index], X.iloc[test_index]
+                y_train, y_test = y.iloc[train_index], y.iloc[test_index]
 
-            pipeline = MLPipeline(template)
-            fit = pipeline.fit(X_train, y_train, output_=self._static - 1,
-                               start_=self._preprocessing, **context)
-            predict = pipeline.predict(X_test, output_=self._static - 1,
-                                       start_=self._preprocessing, **context)
+                pipeline = MLPipeline(template)
+                fit = pipeline.fit(X_train, y_train, output_=static - 1,
+                                   start_=preprocessing, **context)
+                predict = pipeline.predict(X_test, output_=static - 1,
+                                           start_=preprocessing, **context)
 
-            splits.append((fold, pipeline, fit, predict, y_test))
+                splits.append((fold, pipeline, fit, predict, y_test, static))
+
+        except Exception:
+            LOGGER.info('Could not generate splits for %', template_name)
 
         return splits
 
-    def cross_validate(self, template_splits=None, params=None):
+    def cross_validate(self, template_name, template_splits, params=None):
         """Compute cross validation score using the given data.
 
         If the splits have not been previously computed, compute them now.
@@ -359,19 +367,14 @@ def cross_validate(self, template_splits=None, params=None):
                 of the scores obtained accross all the cross validation folds.
         """
         scores = []
-        if template_splits is None:
-            template_splits = self._splits.get(self.template_name)
 
-        for fold, pipeline, fit, predict, y_test in template_splits:
+        for fold, pipeline, fit, predict, y_test, static in template_splits:
             LOGGER.debug('Scoring fold %s', fold)
 
-            if params:
-                pipeline.set_hyperparameters(params)
-            else:
-                pipeline.set_hyperparameters(pipeline.get_hyperparameters())
+            pipeline.set_hyperparameters(params)
 
-            pipeline.fit(start_=self._static, **fit)
-            predictions = pipeline.predict(start_=self._static, **predict)
+            pipeline.fit(start_=static, **fit)
+            predictions = pipeline.predict(start_=static, **predict)
 
             score = self._metric(y_test, predictions)
 
@@ -379,6 +382,13 @@ def cross_validate(self, template_splits=None, params=None):
             scores.append(score)
 
         cv_score = np.mean(scores)
+
+        if self._is_better(cv_score):
+            self.cv_score = cv_score
+            self.template_name = template_name
+            self._hyperparameters = deepcopy(params)
+            self._build_pipeline(self._hyperparameters)
+
         return cv_score
 
     @staticmethod
@@ -412,22 +422,18 @@ def _get_tunables(cls, templates):
 
     def _make_btb_scorer(self, target_times, readings, turbines):
 
-        def scorer(template_name, config):
-
-            template_splits = self._splits.get(template_name)
-            if not template_splits:
-                template_splits = self._generate_splits(
-                    template_name, target_times, readings, turbines)
+        splits = {
+            template_name: self._generate_splits(template_name, target_times, readings, turbines)
+            for template_name in list(self.templates.keys())
+        }
 
-                self._splits[template_name] = template_splits
-
-            score = self.cross_validate(template_splits, config)
+        def scorer(template_name, config):
+            template_splits = splits.get(template_name)
+            if template_splits:
+                score = self.cross_validate(template_name, template_splits, config)
 
-            if self._is_better(score):
-                self.cv_score = score
-                self.template_name = template_name
-                self._hyperparameters = deepcopy(config)
-                self._build_pipeline()
+            else:
+                return None
 
             return score
 

From f23af78bd9567fece6ab314971205aa053e7a0ac Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Wed, 3 Jun 2020 12:46:52 +0200
Subject: [PATCH 032/171] Code review / improved aproach

---
 greenguard/pipeline.py | 226 ++++++++++++++++++++++-------------------
 1 file changed, 122 insertions(+), 104 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 190997a..a6d8a15 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -142,6 +142,10 @@ class GreenGuardPipeline(object):
     _init_params = None
     _preprocessing = None
 
+    @staticmethod
+    def _clone_pipeline(pipeline):
+        return MLPipeline.from_dict(pipeline.to_dict())
+
     def _get_cv(self, stratify, cv_splits, shuffle, random_state):
         if stratify:
             cv_class = StratifiedKFold
@@ -150,14 +154,30 @@ def _get_cv(self, stratify, cv_splits, shuffle, random_state):
 
         return cv_class(n_splits=cv_splits, shuffle=shuffle, random_state=random_state)
 
-    @staticmethod
-    def _count_static_steps(pipeline):
-        tunable_hyperparams = pipeline.get_tunable_hyperparameters()
-        for index, block_name in enumerate(pipeline.blocks.keys()):
-            if tunable_hyperparams[block_name]:
-                return index
+    def _get_init_params(self, template_name):
+        if self._init_params is None:
+            return {}
 
-        return 0
+        elif any(name in self._init_params for name in list(self.template_names.keys())):
+            return self._init_params.get(template_name)
+
+        return self._init_params
+
+    def _set_hyperparameters(self, new_hyperparameters):
+        self._hyperparameters = deepcopy(new_hyperparameters)
+
+    def _set_template(self, template_name):
+        self.template_name = deepcopy(template_name)
+        self.template = self.templates[self.template_name]
+
+    def _get_preprocessing(self, template_name):
+        if isinstance(self._preprocessing, int):
+            return self._preprocessing
+
+        if isinstance(self._preprocessing, dict):
+            return self._preprocessing.get(template_name) or 0
+
+        return 0  # by default
 
     @staticmethod
     def _update_params(old, new):
@@ -169,6 +189,32 @@ def _update_params(old, new):
             for param, value in params.items():
                 block_params[param] = value
 
+    def _build_pipeline(self):
+        template_params = self.template.setdefault('init_params', dict())
+        for name, params in list(template_params.items()):
+            if '#' not in name:
+                template_params[name + '#1'] = template_params.pop(name)
+
+        init_params = self._get_init_params(self.template_name)
+        if init_params:
+            self._update_params(template_params, init_params)
+
+        self._pipeline = MLPipeline(self.template)
+
+        if self._hyperparameters:
+            self._pipeline.set_hyperparameters(self._hyperparameters)
+
+        self.fitted = False
+
+    @staticmethod
+    def _count_static_steps(pipeline):
+        tunable_hyperparams = pipeline.get_tunable_hyperparameters()
+        for index, block_name in enumerate(pipeline.blocks.keys()):
+            if tunable_hyperparams[block_name]:
+                return index
+
+        return 0
+
     @staticmethod
     def _get_templates(template):
         if not isinstance(template, list):
@@ -188,42 +234,6 @@ def _get_templates(template):
 
         return templates_dict
 
-    def _get_init_params(self, template_name):
-        if self._init_params is None:
-            return {}
-
-        elif template_name in self._init_params:
-            return self._init_params.get(template_name)
-
-        return self._init_params
-
-    def _get_preprocessing(self, template_name):
-        if isinstance(self._preprocessing, int):
-            return self._preprocessing
-
-        if isinstance(self._preprocessing, dict):
-            return self._preprocessing.get(template_name) or 0
-
-        return 0  # by default
-
-    def _build_pipeline(self, hyperparameters=None):
-        template_params = self.template.setdefault('init_params', dict())
-        for name, params in list(template_params.items()):
-            if '#' not in name:
-                template_params[name + '#1'] = template_params.pop(name)
-
-        init_params = self._get_init_params(self.template_name)
-
-        if init_params:
-            self._update_params(template_params, init_params)
-
-        self._pipeline = MLPipeline(self.template_name)
-
-        if hyperparameters:
-            self._pipeline.set_hyperparameters(hyperparameters)
-
-        self.fitted = False
-
     def __init__(self, template, metric='accuracy', cost=False, init_params=None, stratify=True,
                  cv_splits=5, shuffle=True, random_state=0, preprocessing=0):
 
@@ -240,13 +250,14 @@ def __init__(self, template, metric='accuracy', cost=False, init_params=None, st
         self.cv_score = np.inf if cost else -np.inf
 
         self.templates = self._get_templates(template)
-        self.template_name = list(self.templates.keys())[0]
-        self.template = self.templates[self.template_name]
+        self._set_template(list(self.templates.keys())[0])
 
+        self._hyperparameters = dict()
         self._build_pipeline()
 
         _static = self._count_static_steps(self._pipeline)
         _preprocessing = self._get_preprocessing(self.template_name)
+
         if _preprocessing and (_preprocessing > _static):
             raise ValueError('Preprocessing cannot be bigger than static')
 
@@ -282,10 +293,6 @@ def get_hyperparameters(self):
         """
         return deepcopy(self._hyperparameters)
 
-    @staticmethod
-    def _clone_pipeline(pipeline):
-        return MLPipeline.from_dict(pipeline.to_dict())
-
     def _is_better(self, score):
         if self._cost:
             return score < self.cv_score
@@ -303,6 +310,10 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
         static = self._count_static_steps(pipeline)
 
         if preprocessing:
+
+            if preprocessing > static:
+                raise ValueError('Preprocessing cannot be bigger than static')
+
             LOGGER.debug('Running %s preprocessing steps', preprocessing)
             context = pipeline.fit(X=X, y=y, readings=readings,
                                    turbines=turbines, output_=preprocessing - 1)
@@ -315,26 +326,65 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
             }
 
         splits = list()
-        try:
-            for fold, (train_index, test_index) in enumerate(self._cv.split(X, y)):
-                LOGGER.debug('Running static steps for fold %s', fold)
-                X_train, X_test = X.iloc[train_index], X.iloc[test_index]
-                y_train, y_test = y.iloc[train_index], y.iloc[test_index]
-
-                pipeline = MLPipeline(template)
-                fit = pipeline.fit(X_train, y_train, output_=static - 1,
-                                   start_=preprocessing, **context)
-                predict = pipeline.predict(X_test, output_=static - 1,
-                                           start_=preprocessing, **context)
+        for fold, (train_index, test_index) in enumerate(self._cv.split(X, y)):
+            LOGGER.debug('Running static steps for fold %s', fold)
+            X_train, X_test = X.iloc[train_index], X.iloc[test_index]
+            y_train, y_test = y.iloc[train_index], y.iloc[test_index]
 
-                splits.append((fold, pipeline, fit, predict, y_test, static))
+            pipeline = MLPipeline(template)
+            fit = pipeline.fit(X_train, y_train, output_=static - 1,
+                               start_=preprocessing, **context)
+            predict = pipeline.predict(X_test, output_=static - 1,
+                                       start_=preprocessing, **context)
 
-        except Exception:
-            LOGGER.info('Could not generate splits for %', template_name)
+            splits.append((fold, pipeline, fit, predict, y_test, static))
 
         return splits
 
-    def cross_validate(self, template_name, template_splits, params=None):
+    def _cross_validate(self, template_splits, hyperparams):
+        scores = []
+        for fold, pipeline, fit, predict, y_test, static in template_splits:
+            LOGGER.debug('Scoring fold %s', fold)
+
+            pipeline.set_hyperparameters(hyperparams)
+            pipeline.fit(start_=static, **fit)
+            predictions = pipeline.predict(start_=static, **predict)
+
+            score = self._metric(y_test, predictions)
+            LOGGER.debug('Fold fold %s score: %s', fold, score)
+            scores.append(score)
+
+        cv_score = np.mean(scores)
+
+        return cv_score
+
+    def _make_btb_scorer(self, target_times, readings, turbines):
+
+        splits = {}
+
+        def scorer(template_name, config):
+
+            template_splits = splits.get(template_name)
+            if template_splits is None:
+                template_splits = self._generate_splits(
+                    template_name, target_times, readings, turbines)
+
+                splits[template_name] = template_splits
+
+            cv_score = self._cross_validate(template_name, template_splits, config)
+
+            if self._is_better(cv_score):
+                self.cv_score = cv_score
+                self._set_template(template_name)
+                self._set_hyperparameters(deepcopy(config))
+                self._build_pipeline()
+
+            return cv_score
+
+        return scorer
+
+    def cross_validate(self, target_times, readings, turbines,
+                       template_name=None, hyperparams=None):
         """Compute cross validation score using the given data.
 
         If the splits have not been previously computed, compute them now.
@@ -366,30 +416,17 @@ def cross_validate(self, template_name, template_splits, params=None):
                 Computed cross validation score. This score is the average
                 of the scores obtained accross all the cross validation folds.
         """
-        scores = []
-
-        for fold, pipeline, fit, predict, y_test, static in template_splits:
-            LOGGER.debug('Scoring fold %s', fold)
-
-            pipeline.set_hyperparameters(params)
-
-            pipeline.fit(start_=static, **fit)
-            predictions = pipeline.predict(start_=static, **predict)
 
-            score = self._metric(y_test, predictions)
+        if not template_name:
+            template_name = self.template_name
+            if hyperparams is None:
+                hyperparams = self.get_hyperparams()
 
-            LOGGER.debug('Fold fold %s score: %s', fold, score)
-            scores.append(score)
-
-        cv_score = np.mean(scores)
+        elif hyperparams is None:
+            hyperparams = {}
 
-        if self._is_better(cv_score):
-            self.cv_score = cv_score
-            self.template_name = template_name
-            self._hyperparameters = deepcopy(params)
-            self._build_pipeline(self._hyperparameters)
-
-        return cv_score
+        template_splits = self._generate_splits(template_name, target_times, readings, turbines)
+        return self._cross_validate(template_splits, hyperparams)
 
     @staticmethod
     def _parse_params(param_details):
@@ -420,25 +457,6 @@ def _get_tunables(cls, templates):
 
         return tunables
 
-    def _make_btb_scorer(self, target_times, readings, turbines):
-
-        splits = {
-            template_name: self._generate_splits(template_name, target_times, readings, turbines)
-            for template_name in list(self.templates.keys())
-        }
-
-        def scorer(template_name, config):
-            template_splits = splits.get(template_name)
-            if template_splits:
-                score = self.cross_validate(template_name, template_splits, config)
-
-            else:
-                return None
-
-            return score
-
-        return scorer
-
     def tune(self, target_times, readings, turbines=None, iterations=10):
         """Tune this pipeline for the indicated number of iterations.
 

From f83777ad369599fee330d63694f7b4a105aed9a3 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Wed, 3 Jun 2020 17:31:18 +0200
Subject: [PATCH 033/171] code improvements.

---
 greenguard/pipeline.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index a6d8a15..f81ae99 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -371,7 +371,7 @@ def scorer(template_name, config):
 
                 splits[template_name] = template_splits
 
-            cv_score = self._cross_validate(template_name, template_splits, config)
+            cv_score = self._cross_validate(template_splits, config)
 
             if self._is_better(cv_score):
                 self.cv_score = cv_score
@@ -420,7 +420,7 @@ def cross_validate(self, target_times, readings, turbines,
         if not template_name:
             template_name = self.template_name
             if hyperparams is None:
-                hyperparams = self.get_hyperparams()
+                hyperparams = self.get_hyperparameters()
 
         elif hyperparams is None:
             hyperparams = {}

From 60cde7ade9fb4f0fe3336cdbcba7021ae3e0b183 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 4 Jun 2020 16:18:38 +0200
Subject: [PATCH 034/171] Update notebook

---
 notebooks/1. GreenGuard Quickstart.ipynb | 304 +++++++++++++++--------
 1 file changed, 204 insertions(+), 100 deletions(-)

diff --git a/notebooks/1. GreenGuard Quickstart.ipynb b/notebooks/1. GreenGuard Quickstart.ipynb
index 9c0e2d7..be4b2f6 100644
--- a/notebooks/1. GreenGuard Quickstart.ipynb	
+++ b/notebooks/1. GreenGuard Quickstart.ipynb	
@@ -411,13 +411,13 @@
     {
      "data": {
       "text/plain": [
-       "['resample_600s_normalize_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_double_144_lstm_timeseries_classifier',\n",
+       "['resample_600s_unstack_144_lstm_timeseries_classifier',\n",
        " 'resample_3600s_unstack_24_lstm_timeseries_classifier',\n",
-       " 'resample_3600s_unstack_double_24_lstm_timeseries_classifier',\n",
        " 'resample_600s_unstack_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_144_lstm_timeseries_classifier']"
+       " 'resample_600s_normalize_dfs_1d_xgb_classifier',\n",
+       " 'resample_3600s_unstack_double_24_lstm_timeseries_classifier',\n",
+       " 'resample_600s_unstack_double_144_lstm_timeseries_classifier',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier']"
       ]
      },
      "execution_count": 10,
@@ -446,9 +446,9 @@
     {
      "data": {
       "text/plain": [
-       "['resample_600s_normalize_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_dfs_1d_xgb_classifier']"
+       "['resample_600s_unstack_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_normalize_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier']"
       ]
      },
      "execution_count": 11,
@@ -476,9 +476,9 @@
     {
      "data": {
       "text/plain": [
-       "{'resample_600s_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_unstack_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json'}"
+       "{'resample_600s_unstack_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json'}"
       ]
      },
      "execution_count": 12,
@@ -495,15 +495,18 @@
    "metadata": {},
    "source": [
     "For the rest of this tutorial, we will select and use the pipeline\n",
-    "`resample_600s_unstack_normalize_dfs_1d_xgb_classifier` as our template.\n",
+    "`resample_600s_unstack_normalize_dfs_1d_xgb_classifier` and `resample_600s_normalize_dfs_1d_xgb_classifier`.\n",
     "\n",
-    "This templates contains the following steps:\n",
+    "The `resample_600s_unstack_normalize_dfs_1d_xgb_classifier` template contains the following steps:\n",
     "\n",
     "- Resample the data using a 10 minute average aggregation\n",
     "- Unstack the data by signal, so each signal is in a different column\n",
     "- Normalize the Turbine IDs into a new table to assist DFS aggregations\n",
     "- Use DFS on the readings based on the target_times cutoff times using a 1d window size\n",
-    "- Apply an XGBoost Classifier"
+    "- Apply an XGBoost Classifier\n",
+    "\n",
+    "And the `resample_600s_normalize_dfs_1d_xgb_classifier` template contains the above steps but without\n",
+    "unstacking the data by signal."
    ]
   },
   {
@@ -512,7 +515,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "template = 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier'"
+    "templates = [\n",
+    "    'resample_600s_unstack_normalize_dfs_1d_xgb_classifier', \n",
+    "    'resample_600s_normalize_dfs_1d_xgb_classifier'\n",
+    "]"
    ]
   },
   {
@@ -543,16 +549,21 @@
    "source": [
     "from greenguard.pipeline import GreenGuardPipeline\n",
     "\n",
-    "pipeline = GreenGuardPipeline(template, metric='f1', cv_splits=3)"
+    "pipeline = GreenGuardPipeline(templates, metric='f1', cv_splits=3)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Once we have created the pipeline, we can call its `tune` method to find the best possible\n",
-    "hyperparameters for our data, passing the `target_times` and `readings` variables,\n",
-    "as well as an indication of the number of tuning iterations that we want to perform."
+    "Once we have created the pipeline, we can find which template and which combination of hyperparameters works best for our data by calling the `tune` method of our pipeline, passing its `target_times` and `readings` variables.\n",
+    "This method will return a `BTBSession` session that will:\n",
+    "- Select and tune templates.\n",
+    "- If a template or hyperparameters that get a higher score than the previous one is found, automatically update our pipeline so that it uses that template with those hyperparameters.\n",
+    "- Remove templates that don't work with the given data and focus on tuning only the ones that do.\n",
+    "\n",
+    "Also, if we specify `iterations` the `tune` method will return a session that has already perfromed tuning\n",
+    "for the given amount of iterations:"
    ]
   },
   {
@@ -564,8 +575,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.pipeline:Scoring the default pipeline\n",
-      "INFO:greenguard.pipeline:Running static steps before cross validation\n"
+      "INFO:btb.session:Obtaining default configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n"
      ]
     },
     {
@@ -573,47 +583,63 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 00:47 | Progress: 100%|██████████\n",
-      "Elapsed: 00:24 | Progress: 100%|██████████\n",
+      "Elapsed: 01:46 | Progress: 100%|██████████\n",
+      "Elapsed: 00:43 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 00:50 | Progress: 100%|██████████\n",
-      "Elapsed: 00:23 | Progress: 100%|██████████\n",
+      "Elapsed: 00:57 | Progress: 100%|██████████\n",
+      "Elapsed: 00:27 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 00:46 | Progress: 100%|██████████\n",
-      "Elapsed: 00:23 | Progress: 100%|██████████\n"
+      "Elapsed: 00:54 | Progress: 100%|██████████\n",
+      "Elapsed: 00:24 | Progress: 100%|██████████\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.605187908496732\n",
+      "INFO:btb.session:Obtaining default configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Built 99 features\n",
+      "Elapsed: 03:44 | Progress: 100%|██████████\n",
+      "Elapsed: 01:11 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 02:24 | Progress: 100%|██████████\n",
+      "Elapsed: 01:10 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 02:55 | Progress: 100%|██████████\n",
+      "Elapsed: 02:15 | Progress: 100%|██████████\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.pipeline:Default Pipeline score: 0.605187908496732\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 1\n",
-      "INFO:btb:Using Uniform sampler as user specified r_minimum threshold is not met to start the GP based learning\n",
-      "INFO:greenguard.pipeline:Pipeline 1 score: 0.6188131761825791\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 2\n",
-      "INFO:greenguard.pipeline:Pipeline 2 score: 0.6271095502877767\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 3\n",
-      "INFO:greenguard.pipeline:Pipeline 3 score: 0.6305597783858653\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 4\n",
-      "INFO:greenguard.pipeline:Pipeline 4 score: 0.6024864024864024\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 5\n",
-      "INFO:greenguard.pipeline:Pipeline 5 score: 0.6141217155301661\n"
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6074772975193733\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n"
      ]
     }
    ],
    "source": [
-    "pipeline.tune(target_times, readings, iterations=5)"
+    "session = pipeline.tune(target_times, readings, iterations=5)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "After the tuning process has finished, the hyperparameters have been already set in the classifier.\n",
+    "After the tuning process has finished, the template and the hyperparameters \n",
+    "that have obtained the best score have been already set in the classifier.\n",
     "\n",
-    "We can see the found hyperparameters by calling the `get_hyperparameters` method,\n",
-    "which will return a dictionary with the best hyperparameters found so far:"
+    "We can see the `best_proposal` that contains the tempalte name, hyperparameters\n",
+    "and score by accessing the `session.best_proposal`:"
    ]
   },
   {
@@ -624,12 +650,16 @@
     {
      "data": {
       "text/plain": [
-       "{'mlprimitives.custom.feature_extraction.CategoricalEncoder#1': {'max_labels': 82},\n",
-       " 'xgboost.XGBClassifier#1': {'n_estimators': 785,\n",
-       "  'max_depth': 7,\n",
-       "  'learning_rate': 0.12220259756122442,\n",
-       "  'gamma': 0.07359343182340616,\n",
-       "  'min_child_weight': 9}}"
+       "{'id': 'c18f45d5e3bc2e41b3b3456b24d34add',\n",
+       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
+       "   'max_labels'): 82,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 940,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 4,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.5949116894971435,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.14299079052852726,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 9},\n",
+       " 'score': 0.6074772975193733}"
       ]
      },
      "execution_count": 16,
@@ -637,10 +667,70 @@
      "output_type": "execute_result"
     }
    ],
+   "source": [
+    "session.best_proposal"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "You can check that the new hyperparameters are already set by callgin `get_hyperparameters` method: "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
+       "  'max_labels'): 82,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 940,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 4,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.5949116894971435,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.14299079052852726,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 9}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "pipeline.get_hyperparameters()"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We can check the template name that is used to generate the pipeline:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'resample_600s_unstack_normalize_dfs_1d_xgb_classifier'"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.template_name"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -651,16 +741,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6305597783858653"
+       "0.6074772975193733"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -673,59 +763,72 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "**NOTE**: If the score is not good enough, we can call the `tune` method again as many times\n",
-    "as needed and the pipeline will continue its tuning process every time based on the previous\n",
-    "results!"
+    "**NOTE**: If the score is not good enough, we can call the `run` method of the `session` again,\n",
+    "specifying the amount of iterations, and this will continue its tuning process continuing from\n",
+    "the previous results!"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.pipeline:Scoring pipeline 1\n",
-      "INFO:greenguard.pipeline:Pipeline 1 score: 0.6635006784260514\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 2\n",
-      "INFO:greenguard.pipeline:Pipeline 2 score: 0.6845139382452815\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 3\n",
-      "INFO:greenguard.pipeline:Pipeline 3 score: 0.6424425247954658\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 4\n",
-      "INFO:greenguard.pipeline:Pipeline 4 score: 0.6146558553876801\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 5\n",
-      "INFO:greenguard.pipeline:Pipeline 5 score: 0.6188226349516671\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 6\n",
-      "INFO:greenguard.pipeline:Pipeline 6 score: 0.6213326748609891\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 7\n",
-      "INFO:greenguard.pipeline:Pipeline 7 score: 0.6431577681577682\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 8\n",
-      "INFO:greenguard.pipeline:Pipeline 8 score: 0.6119918008302174\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 9\n",
-      "INFO:greenguard.pipeline:Pipeline 9 score: 0.670814479638009\n",
-      "INFO:greenguard.pipeline:Scoring pipeline 10\n",
-      "INFO:greenguard.pipeline:Pipeline 10 score: 0.6781385082782808\n"
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6215756372962148\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6228241559394411\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:New optimal found: resample_600s_normalize_dfs_1d_xgb_classifier - 0.6310483870967741\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6421858959172391\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'id': '597e7123769b671e0f0c964311ebc005',\n",
+       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
+       "   'max_labels'): 5,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 119,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 10,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.8912106438743266,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.022878268134643553,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
+       " 'score': 0.6421858959172391}"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "pipeline.tune(target_times, readings, iterations=10)"
+    "session.run(iterations=10)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6845139382452815"
+       "0.6421858959172391"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -736,21 +839,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'mlprimitives.custom.feature_extraction.CategoricalEncoder#1': {'max_labels': 84},\n",
-       " 'xgboost.XGBClassifier#1': {'n_estimators': 788,\n",
-       "  'max_depth': 4,\n",
-       "  'learning_rate': 0.13866846579555614,\n",
-       "  'gamma': 0.652732260680545,\n",
-       "  'min_child_weight': 10}}"
+       "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
+       "  'max_labels'): 5,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 119,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 10,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.8912106438743266,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.022878268134643553,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 1}"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -774,7 +878,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -782,7 +886,7 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 00:52 | Progress: 100%|██████████\n"
+      "Elapsed: 02:08 | Progress: 100%|██████████\n"
      ]
     }
    ],
@@ -801,14 +905,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elapsed: 00:17 | Progress: 100%|██████████\n"
+      "Elapsed: 00:24 | Progress: 100%|██████████\n"
      ]
     }
    ],
@@ -825,7 +929,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -834,7 +938,7 @@
        "0.76"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -863,7 +967,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -882,7 +986,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -898,14 +1002,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elapsed: 00:17 | Progress: 100%|██████████\n"
+      "Elapsed: 00:22 | Progress: 100%|██████████\n"
      ]
     },
     {
@@ -914,7 +1018,7 @@
        "array([0, 0, 0, 1, 0])"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -941,7 +1045,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.6.10"
   }
  },
  "nbformat": 4,

From bacc0eaaa587e7300de518ec45bb015f919ff7bd Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 5 Jun 2020 16:13:36 +0200
Subject: [PATCH 035/171] Fix CSV filename format.

---
 docs/advanced_usage/csv.md          |  8 +--
 greenguard/demo.py                  |  2 +-
 greenguard/loaders/csv.py           |  2 +-
 notebooks/2. Extract Readings.ipynb | 76 ++++++++++++++---------------
 4 files changed, 44 insertions(+), 44 deletions(-)

diff --git a/docs/advanced_usage/csv.md b/docs/advanced_usage/csv.md
index c020832..c267807 100644
--- a/docs/advanced_usage/csv.md
+++ b/docs/advanced_usage/csv.md
@@ -26,10 +26,10 @@ following structure:
     * `readings/T001`
     * `readings/T002`
     * ...
-* Inside each turbine folder one CSV file exists for each month, named `%Y-%m-.csv`.
-    * `readings/T001/2010-01-.csv`
-    * `readings/T001/2010-02-.csv`
-    * `readings/T001/2010-03-.csv`
+* Inside each turbine folder one CSV file exists for each month, named `%Y-%m.csv`.
+    * `readings/T001/2010-01.csv`
+    * `readings/T001/2010-02.csv`
+    * `readings/T001/2010-03.csv`
     * ...
 * Each CSV file contains three columns:
     * `signal_id`: name or id of the signal.
diff --git a/greenguard/demo.py b/greenguard/demo.py
index e15f71d..429e0e9 100644
--- a/greenguard/demo.py
+++ b/greenguard/demo.py
@@ -62,7 +62,7 @@ def generate_raw_readings(output_path='demo'):
         for month in range(1, 13):
             month_data = data[data.timestamp.dt.month == month].copy()
             month_data['timestamp'] = month_data['timestamp'].dt.strftime('%m/%d/%y %H:%M:%S')
-            month_path = os.path.join(turbine_path, '2013-{:02d}-.csv'.format(month))
+            month_path = os.path.join(turbine_path, '2013-{:02d}.csv'.format(month))
             LOGGER.info('Generating file %s', month_path)
             month_data[['signal_id', 'timestamp', 'value']].to_csv(month_path, index=False)
 
diff --git a/greenguard/loaders/csv.py b/greenguard/loaders/csv.py
index 6e3729a..97d33ee 100644
--- a/greenguard/loaders/csv.py
+++ b/greenguard/loaders/csv.py
@@ -37,7 +37,7 @@ class CSVLoader:
     """
 
     DEFAULT_DATETIME_FMT = '%m/%d/%y %H:%M:%S'
-    DEFAULT_FILENAME_FMT = '%Y-%m-.csv'
+    DEFAULT_FILENAME_FMT = '%Y-%m.csv'
 
     def __init__(self, readings_path='.', rule=None, aggregation='mean', unstack=False,
                  datetime_fmt=DEFAULT_DATETIME_FMT, filename_fmt=DEFAULT_FILENAME_FMT):
diff --git a/notebooks/2. Extract Readings.ipynb b/notebooks/2. Extract Readings.ipynb
index f8166a0..d306172 100644
--- a/notebooks/2. Extract Readings.ipynb	
+++ b/notebooks/2. Extract Readings.ipynb	
@@ -70,18 +70,18 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.demo:Generating file readings/T001/2013-01-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-02-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-03-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-04-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-05-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-06-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-07-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-08-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-09-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-10-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-11-.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-12-.csv\n"
+      "INFO:greenguard.demo:Generating file readings/T001/2013-01.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-02.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-03.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-04.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-05.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-06.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-07.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-08.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-09.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-10.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-11.csv\n",
+      "INFO:greenguard.demo:Generating file readings/T001/2013-12.csv\n"
      ]
     }
    ],
@@ -108,7 +108,7 @@
    "source": [
     "import pandas as pd\n",
     "\n",
-    "readings_sample = pd.read_csv('readings/T001/2013-01-.csv')"
+    "readings_sample = pd.read_csv('readings/T001/2013-01.csv')"
    ]
   },
   {
@@ -206,10 +206,10 @@
     "    * `readings/T001`\n",
     "    * `readings/T002`\n",
     "    * ...\n",
-    "* Inside each turbine folder one CSV file exists for each month, named `%Y-%m-.csv`.\n",
-    "    * `readings/T001/2010-01-.csv`\n",
-    "    * `readings/T001/2010-02-.csv`\n",
-    "    * `readings/T001/2010-03-.csv`\n",
+    "* Inside each turbine folder one CSV file exists for each month, named `%Y-%m.csv`.\n",
+    "    * `readings/T001/2010-01.csv`\n",
+    "    * `readings/T001/2010-02.csv`\n",
+    "    * `readings/T001/2010-03.csv`\n",
     "    * ...\n",
     "* Each CSV file contains three columns:\n",
     "    * `signal_id`: name or id of the signal.\n",
@@ -763,30 +763,30 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
       "INFO:greenguard.loaders.csv:81749 readings reduced to 3432\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:110938 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:112118 readings reduced to 4680\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:103319 readings reduced to 4368\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:111862 readings reduced to 4680\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:114400 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:105321 readings reduced to 4550\n",
+      "INFO:greenguard.loaders.csv:108371 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:115615 readings reduced to 4836\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:115647 readings reduced to 4836\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:110938 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:115647 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:103319 readings reduced to 4368\n",
       "INFO:greenguard.loaders.csv:115979 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:111862 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
       "INFO:greenguard.loaders.csv:114477 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:105321 readings reduced to 4550\n",
-      "INFO:greenguard.loaders.csv:115615 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:114400 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:108371 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:112118 readings reduced to 4680\n",
       "INFO:greenguard.loaders.csv:Loaded 55250 readings from turbine T001\n",
       "INFO:greenguard.loaders.csv:Loaded 55250 turbine readings\n",
       "INFO:greenguard.targets:Dropped 12 targets without enough data. Final target_times size: 341\n"
@@ -961,28 +961,28 @@
      "text": [
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:108371 readings reduced to 4680\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:81749 readings reduced to 3432\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:103319 readings reduced to 4368\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:110938 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:115647 readings reduced to 4836\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:103319 readings reduced to 4368\n",
+      "INFO:greenguard.loaders.csv:115615 readings reduced to 4836\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:114400 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:114477 readings reduced to 4836\n",
       "INFO:greenguard.loaders.csv:115979 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:108371 readings reduced to 4680\n",
+      "INFO:greenguard.loaders.csv:111862 readings reduced to 4680\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:greenguard.loaders.csv:81749 readings reduced to 3432\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
       "INFO:greenguard.loaders.csv:105321 readings reduced to 4550\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:115615 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
       "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:114400 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:115647 readings reduced to 4836\n",
       "INFO:greenguard.loaders.csv:112118 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:111862 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:114477 readings reduced to 4836\n",
+      "INFO:greenguard.loaders.csv:110938 readings reduced to 4680\n",
       "INFO:greenguard.loaders.csv:Loaded 2125 readings from turbine T001\n",
       "INFO:greenguard.loaders.csv:Loaded 2125 turbine readings\n",
       "INFO:greenguard.targets:Dropped 12 targets without enough data. Final target_times size: 341\n"
@@ -1366,7 +1366,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.8"
+   "version": "3.6.10"
   }
  },
  "nbformat": 4,

From bf6e56fab343d3dcebb83d3882b3f2e3016dbc14 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 5 Jun 2020 18:37:26 +0200
Subject: [PATCH 036/171] Update links.

---
 README.md                           | 4 ++--
 notebooks/2. Extract Readings.ipynb | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index bef0983..7bb64d4 100644
--- a/README.md
+++ b/README.md
@@ -65,7 +65,7 @@ pip install greenguard
 This will pull and install the latest stable release from [PyPi](https://pypi.org/).
 
 If you want to install from source or contribute to the project please read the
-[Contributing Guide](https://d3-ai.github.io/GreenGuard/contributing.html#get-started).
+[Contributing Guide](https://signals-dev.github.io/GreenGuard/contributing.html#get-started).
 
 ## Docker usage
 
@@ -153,7 +153,7 @@ A part from the in-memory data format explained above, which is limited by the m
 allocation capabilities of the system where it is run, **GreenGuard** is also prepared to
 load and work with data stored as a collection of CSV files, drastically increasing the amount
 of data which it can work with. Further details about this format can be found in the
-[project documentation site](https://d3-ai.github.io/GreenGuard/advanced_usage/csv.html).
+[project documentation site](https://signals-dev.github.io/GreenGuard/advanced_usage/csv.html).
 
 # Quickstart
 
diff --git a/notebooks/2. Extract Readings.ipynb b/notebooks/2. Extract Readings.ipynb
index d306172..db55927 100644
--- a/notebooks/2. Extract Readings.ipynb	
+++ b/notebooks/2. Extract Readings.ipynb	
@@ -10,7 +10,7 @@
     "that contains readings in the raw CSV format.\n",
     "\n",
     "The Raw CSV format es briefly explained below, but more details can be found in [the documentation site](\n",
-    "https://d3-ai.github.io/GreenGuard/advanced_usage/csv.html)/n",
+    "https://signals-dev.github.io/GreenGuard/advanced_usage/csv.html)/n",
     "\n",
     "In this notebook we will:\n",
     "\n",

From b6ca7c22a3a93f285916cf0f9e822555ef86f571 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 8 Jun 2020 18:09:57 +0200
Subject: [PATCH 037/171] PR review and comments.

---
 greenguard/pipeline.py                   | 194 ++++++++------------
 notebooks/1. GreenGuard Quickstart.ipynb | 224 ++++++++++++++---------
 2 files changed, 213 insertions(+), 205 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index f81ae99..228c68c 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -158,8 +158,8 @@ def _get_init_params(self, template_name):
         if self._init_params is None:
             return {}
 
-        elif any(name in self._init_params for name in list(self.template_names.keys())):
-            return self._init_params.get(template_name)
+        elif any(name in self._init_params for name in list(self.templates.keys())):
+            return self._init_params.get(template_name) or {}
 
         return self._init_params
 
@@ -167,8 +167,8 @@ def _set_hyperparameters(self, new_hyperparameters):
         self._hyperparameters = deepcopy(new_hyperparameters)
 
     def _set_template(self, template_name):
-        self.template_name = deepcopy(template_name)
-        self.template = self.templates[self.template_name]
+        self.template_name = template_name
+        self.template = self._template_dicts[self.template_name]
 
     def _get_preprocessing(self, template_name):
         if isinstance(self._preprocessing, int):
@@ -189,25 +189,8 @@ def _update_params(old, new):
             for param, value in params.items():
                 block_params[param] = value
 
-    def _build_pipeline(self):
-        template_params = self.template.setdefault('init_params', dict())
-        for name, params in list(template_params.items()):
-            if '#' not in name:
-                template_params[name + '#1'] = template_params.pop(name)
-
-        init_params = self._get_init_params(self.template_name)
-        if init_params:
-            self._update_params(template_params, init_params)
-
-        self._pipeline = MLPipeline(self.template)
-
-        if self._hyperparameters:
-            self._pipeline.set_hyperparameters(self._hyperparameters)
-
-        self.fitted = False
-
-    @staticmethod
-    def _count_static_steps(pipeline):
+    def _count_static_steps(self, template_name):
+        pipeline = MLPipeline(self._template_dicts.get(template_name))
         tunable_hyperparams = pipeline.get_tunable_hyperparameters()
         for index, block_name in enumerate(pipeline.blocks.keys()):
             if tunable_hyperparams[block_name]:
@@ -215,14 +198,9 @@ def _count_static_steps(pipeline):
 
         return 0
 
-    @staticmethod
-    def _get_templates(template):
-        if not isinstance(template, list):
-            templates = [template]
-        else:
-            templates = template
-
-        templates_dict = dict()
+    def _get_templates(self, templates):
+        template_dicts = dict()
+        template_names = list()
         for template in templates:
             if isinstance(template, str):
                 template_name = template
@@ -230,59 +208,70 @@ def _get_templates(template):
             else:
                 template_name = md5(json.dumps(template)).digest()
 
-            templates_dict[template_name] = template
+            init_params = self._init_params.get(template_name, self._default_init_params)
+            self._update_params(template['init_params'], init_params)
+            template_dicts[template_name] = template
+            template_names.append(template_name)
 
-        return templates_dict
+        return template_names, template_dicts
 
-    def __init__(self, template, metric='accuracy', cost=False, init_params=None, stratify=True,
-                 cv_splits=5, shuffle=True, random_state=0, preprocessing=0):
+    def _generate_init_params(self, init_params):
+        if not init_params:
+            self._init_params = {}
+        elif isinstance(init_params, list):
+            self._init_params = dict(zip(self._template_names, init_params))
+        elif any(name in init_params for name in self._template_names):
+            self._init_params = init_params
 
-        self._cv = self._get_cv(stratify, cv_splits, shuffle, random_state)
+    def _generate_preprocessing(self, preprocessing):
+        if isinstance(preprocessing, int):
+            self._preprocessing = {name: preprocessing for name in self._template_names}
+        else:
+            if isinstance(preprocessing, list):
+                preprocessing = dict(zip(self._temlpate_names, preprocessing))
+
+            self._preprocessing = {
+                name: preprocessing.get(name, 0)
+                for name in self._template_names
+            }
+
+    def _build_pipeline(self):
+        self._pipeline = MLPipeline(self.template)
+
+        if self._hyperparameters:
+            self._pipeline.set_hyperparameters(self._hyperparameters)
+
+        self.fitted = False
+
+    def __init__(self, templates, metric='accuracy', cost=False, init_params=None, stratify=True,
+                 cv_splits=5, shuffle=True, random_state=0, preprocessing=0):
 
         if isinstance(metric, str):
             metric, cost = METRICS[metric]
 
         self._metric = metric
         self._cost = cost
-        self._init_params = init_params
-        self._preprocessing = preprocessing
-
+        self._cv = self._get_cv(stratify, cv_splits, shuffle, random_state)
         self.cv_score = np.inf if cost else -np.inf
 
-        self.templates = self._get_templates(template)
-        self._set_template(list(self.templates.keys())[0])
-
-        self._hyperparameters = dict()
-        self._build_pipeline()
-
-        _static = self._count_static_steps(self._pipeline)
-        _preprocessing = self._get_preprocessing(self.template_name)
+        if not isinstance(templates, list):
+            templates = [templates]
 
-        if _preprocessing and (_preprocessing > _static):
-            raise ValueError('Preprocessing cannot be bigger than static')
+        self._default_init_params = {}
+        self._generate_init_params(init_params)
 
-        self.iterations = 0
+        self.templates = templates
+        self._template_names, self._template_dicts = self._get_templates(templates)
 
-    def __repr__(self):
-        steps = self._pipeline.primitives.copy()
-        preprocessing = self._get_preprocessing(self.template_name)
-        static = self._count_static_steps()
+        self._generate_preprocessing(preprocessing)
+        self._static = {
+            name: self._count_static_steps(name)
+            for name in self._template_names
+        }
 
-        preprocessing_steps = steps[:preprocessing]
-        static_steps = steps[preprocessing:static]
-        tunable_steps = steps[static:]
-
-        return (
-            "GreenGuardPipeline({})\n"
-            "  preprocessing:\n{}\n"
-            "  static:\n{}\n"
-            "  tunable:\n{}\n"
-        ).format(
-            self.template_name,
-            '\n'.join('    {}'.format(step) for step in preprocessing_steps),
-            '\n'.join('    {}'.format(step) for step in static_steps),
-            '\n'.join('    {}'.format(step) for step in tunable_steps),
-        )
+        self._set_template(self._template_names[0])
+        self._hyperparameters = dict()
+        self._build_pipeline()
 
     def get_hyperparameters(self):
         """Get the current hyperparameters.
@@ -300,15 +289,14 @@ def _is_better(self, score):
         return score > self.cv_score
 
     def _generate_splits(self, template_name, target_times, readings, turbines=None):
-        template = self.templates.get(template_name)
+        template = self._template_dicts.get(template_name)
+        pipeline = MLPipeline(template)
+        preprocessing = self._preprocessing.get(template_name)
+        static = self._static.get(template_name)
 
         X = target_times[['turbine_id', 'cutoff_time']]
         y = target_times['target']
 
-        pipeline = MLPipeline(template)
-        preprocessing = self._get_preprocessing(template_name)
-        static = self._count_static_steps(pipeline)
-
         if preprocessing:
 
             if preprocessing > static:
@@ -354,16 +342,13 @@ def _cross_validate(self, template_splits, hyperparams):
             LOGGER.debug('Fold fold %s score: %s', fold, score)
             scores.append(score)
 
-        cv_score = np.mean(scores)
-
-        return cv_score
+        return np.mean(scores)
 
     def _make_btb_scorer(self, target_times, readings, turbines):
 
         splits = {}
 
         def scorer(template_name, config):
-
             template_splits = splits.get(template_name)
             if template_splits is None:
                 template_splits = self._generate_splits(
@@ -372,11 +357,16 @@ def scorer(template_name, config):
                 splits[template_name] = template_splits
 
             cv_score = self._cross_validate(template_splits, config)
-
             if self._is_better(cv_score):
+                _config = '\n'.join('      {}: {}'.format(n, v) for n, v in config.items())
+                LOGGER.info(('New configuration found:\n'
+                             '  Template: %s \n'
+                             '    Hyperparameters: \n'
+                             '%s'), template_name, _config)
+
                 self.cv_score = cv_score
                 self._set_template(template_name)
-                self._set_hyperparameters(deepcopy(config))
+                self._set_hyperparameters(config)
                 self._build_pipeline()
 
             return cv_score
@@ -416,7 +406,6 @@ def cross_validate(self, target_times, readings, turbines,
                 Computed cross validation score. This score is the average
                 of the scores obtained accross all the cross validation folds.
         """
-
         if not template_name:
             template_name = self.template_name
             if hyperparams is None:
@@ -428,37 +417,18 @@ def cross_validate(self, target_times, readings, turbines,
         template_splits = self._generate_splits(template_name, target_times, readings, turbines)
         return self._cross_validate(template_splits, hyperparams)
 
-    @staticmethod
-    def _parse_params(param_details):
-        param_type = param_details['type']
-        param_details['type'] = 'str' if param_type == 'string' else param_type
-
-        if param_details['type'] == 'bool':
-            param_details['range'] = [True, False]
-        else:
-            param_details['range'] = param_details.get('range') or param_details.get('values')
-
-        if 'default' not in param_details:
-            param_details['default'] = param_details['range'][0]
-
-        return param_details
-
     @classmethod
-    def _get_tunables(cls, templates):
-        pipelines = {name: MLPipeline(template) for name, template in templates.items()}
+    def _get_tunables(cls, template_dicts):
         tunables = {}
-
-        for pipeline_name, pipeline in pipelines.items():
-            pipeline_tunables = {}
-            for name, param_details in pipeline.get_tunable_hyperparameters(flat=True).items():
-                pipeline_tunables[name] = cls._parse_params(param_details)
-
-            tunables[pipeline_name] = Tunable.from_dict(pipeline_tunables)
+        for name, template in template_dicts.items():
+            pipeline = MLPipeline(template)
+            pipeline_tunables = pipeline.get_tunable_hyperparameters(flat=True)
+            tunables[name] = Tunable.from_dict(pipeline_tunables)
 
         return tunables
 
-    def tune(self, target_times, readings, turbines=None, iterations=10):
-        """Tune this pipeline for the indicated number of iterations.
+    def tune(self, target_times, readings, turbines=None):
+        """Create a tuning session object that tunes and selects the templates.
 
         Args:
             target_times (pandas.DataFrame):
@@ -471,16 +441,10 @@ def tune(self, target_times, readings, turbines=None, iterations=10):
             turbines (pandas.DataFrame):
                 ``turbines`` table. Only needed if the splits have not been
                 previously computed.
-            iterations (int):
-                Number of iterations to perform.
         """
         scoring_function = self._make_btb_scorer(target_times, readings, turbines)
-        tunables = self._get_tunables(self.templates)
-        session = BTBSession(tunables, scoring_function, maximize=not self._cost)
-        if iterations:
-            session.run(iterations)
-
-        return session
+        tunables = self._get_tunables(self._template_dicts)
+        return BTBSession(tunables, scoring_function, maximize=not self._cost)
 
     def fit(self, target_times, readings, turbines=None):
         """Fit this pipeline to the given data.
diff --git a/notebooks/1. GreenGuard Quickstart.ipynb b/notebooks/1. GreenGuard Quickstart.ipynb
index be4b2f6..5415d8a 100644
--- a/notebooks/1. GreenGuard Quickstart.ipynb	
+++ b/notebooks/1. GreenGuard Quickstart.ipynb	
@@ -560,16 +560,30 @@
     "This method will return a `BTBSession` session that will:\n",
     "- Select and tune templates.\n",
     "- If a template or hyperparameters that get a higher score than the previous one is found, automatically update our pipeline so that it uses that template with those hyperparameters.\n",
-    "- Remove templates that don't work with the given data and focus on tuning only the ones that do.\n",
-    "\n",
-    "Also, if we specify `iterations` the `tune` method will return a session that has already perfromed tuning\n",
-    "for the given amount of iterations:"
+    "- Remove templates that don't work with the given data and focus on tuning only the ones that do."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 15,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "session = pipeline.tune(target_times, readings)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Once we have our `session` we can call it's method `run` with the amount of\n",
+    "tuning iterations that we want to perform:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
    "outputs": [
     {
      "name": "stderr",
@@ -583,21 +597,30 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 01:46 | Progress: 100%|██████████\n",
-      "Elapsed: 00:43 | Progress: 100%|██████████\n",
+      "Elapsed: 01:40 | Progress: 100%|██████████\n",
+      "Elapsed: 00:53 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 00:57 | Progress: 100%|██████████\n",
-      "Elapsed: 00:27 | Progress: 100%|██████████\n",
+      "Elapsed: 01:37 | Progress: 100%|██████████\n",
+      "Elapsed: 00:45 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 00:54 | Progress: 100%|██████████\n",
-      "Elapsed: 00:24 | Progress: 100%|██████████\n"
+      "Elapsed: 01:52 | Progress: 100%|██████████\n",
+      "Elapsed: 00:58 | Progress: 100%|██████████\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.605187908496732\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 100\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6013257575757575\n",
       "INFO:btb.session:Obtaining default configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n"
      ]
     },
@@ -606,14 +629,14 @@
      "output_type": "stream",
      "text": [
       "Built 99 features\n",
-      "Elapsed: 03:44 | Progress: 100%|██████████\n",
-      "Elapsed: 01:11 | Progress: 100%|██████████\n",
+      "Elapsed: 05:21 | Progress: 100%|██████████\n",
+      "Elapsed: 02:31 | Progress: 100%|██████████\n",
       "Built 99 features\n",
-      "Elapsed: 02:24 | Progress: 100%|██████████\n",
-      "Elapsed: 01:10 | Progress: 100%|██████████\n",
+      "Elapsed: 02:37 | Progress: 100%|██████████\n",
+      "Elapsed: 01:14 | Progress: 100%|██████████\n",
       "Built 99 features\n",
-      "Elapsed: 02:55 | Progress: 100%|██████████\n",
-      "Elapsed: 02:15 | Progress: 100%|██████████\n"
+      "Elapsed: 02:17 | Progress: 100%|██████████\n",
+      "Elapsed: 01:10 | Progress: 100%|██████████\n"
      ]
     },
     {
@@ -621,48 +644,73 @@
      "output_type": "stream",
      "text": [
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6074772975193733\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 76\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 663\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 4\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6981330874338336\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.42260412740973985\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 10\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6636363636363637\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'id': '73d39187c5c6ecfac03b05d407bf709d',\n",
+       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
+       "   'max_labels'): 76,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 663,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 4,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6981330874338336,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.42260412740973985,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
+       " 'score': 0.6636363636363637}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "session = pipeline.tune(target_times, readings, iterations=5)"
+    "session.run(5)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "After the tuning process has finished, the template and the hyperparameters \n",
-    "that have obtained the best score have been already set in the classifier.\n",
-    "\n",
-    "We can see the `best_proposal` that contains the tempalte name, hyperparameters\n",
-    "and score by accessing the `session.best_proposal`:"
+    "When this is done, the `best_proposal` will be printed out. We can access it anytime\n",
+    "using `session.best_proposal`:"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'id': 'c18f45d5e3bc2e41b3b3456b24d34add',\n",
+       "{'id': '73d39187c5c6ecfac03b05d407bf709d',\n",
        " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 82,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 940,\n",
+       "   'max_labels'): 76,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 663,\n",
        "  ('xgboost.XGBClassifier#1', 'max_depth'): 4,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.5949116894971435,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.14299079052852726,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 9},\n",
-       " 'score': 0.6074772975193733}"
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6981330874338336,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.42260412740973985,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
+       " 'score': 0.6636363636363637}"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -680,22 +728,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 82,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 940,\n",
+       "  'max_labels'): 76,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 663,\n",
        " ('xgboost.XGBClassifier#1', 'max_depth'): 4,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.5949116894971435,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.14299079052852726,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 9}"
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6981330874338336,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.42260412740973985,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10}"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -713,7 +761,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -722,7 +770,7 @@
        "'resample_600s_unstack_normalize_dfs_1d_xgb_classifier'"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -741,16 +789,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6074772975193733"
+       "0.6636363636363637"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -770,7 +818,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -779,36 +827,42 @@
      "text": [
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6215756372962148\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6228241559394411\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:New optimal found: resample_600s_normalize_dfs_1d_xgb_classifier - 0.6310483870967741\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6421858959172391\n"
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 17\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 880\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3214711402471415\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.9330408960929772\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 10\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6807110281923715\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': '597e7123769b671e0f0c964311ebc005',\n",
+       "{'id': 'f47187d007ea31262e087264580716c9',\n",
        " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 5,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 119,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 10,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.8912106438743266,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.022878268134643553,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
-       " 'score': 0.6421858959172391}"
+       "   'max_labels'): 17,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 880,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3214711402471415,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.9330408960929772,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
+       " 'score': 0.6807110281923715}"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -819,16 +873,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6421858959172391"
+       "0.6807110281923715"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -839,22 +893,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 5,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 119,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 10,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.8912106438743266,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.022878268134643553,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 1}"
+       "  'max_labels'): 17,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 880,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3214711402471415,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.9330408960929772,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10}"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -878,7 +932,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
@@ -886,7 +940,7 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 02:08 | Progress: 100%|██████████\n"
+      "Elapsed: 00:55 | Progress: 100%|██████████\n"
      ]
     }
    ],
@@ -905,14 +959,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elapsed: 00:24 | Progress: 100%|██████████\n"
+      "Elapsed: 00:17 | Progress: 100%|██████████\n"
      ]
     }
    ],
@@ -929,7 +983,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
@@ -938,7 +992,7 @@
        "0.76"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -967,7 +1021,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -986,7 +1040,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1002,25 +1056,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elapsed: 00:22 | Progress: 100%|██████████\n"
+      "Elapsed: 00:01 | Progress:  11%|█         "
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "array([0, 0, 0, 1, 0])"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [

From fc06af57fde8dbe69527406aea9dabfdd2d59feb Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 8 Jun 2020 19:58:42 +0200
Subject: [PATCH 038/171] Fix init params

---
 greenguard/pipeline.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 228c68c..a8e55d0 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -207,9 +207,6 @@ def _get_templates(self, templates):
                 template = load_pipeline(template_name)
             else:
                 template_name = md5(json.dumps(template)).digest()
-
-            init_params = self._init_params.get(template_name, self._default_init_params)
-            self._update_params(template['init_params'], init_params)
             template_dicts[template_name] = template
             template_names.append(template_name)
 
@@ -228,7 +225,7 @@ def _generate_preprocessing(self, preprocessing):
             self._preprocessing = {name: preprocessing for name in self._template_names}
         else:
             if isinstance(preprocessing, list):
-                preprocessing = dict(zip(self._temlpate_names, preprocessing))
+                preprocessing = dict(zip(self._template_names, preprocessing))
 
             self._preprocessing = {
                 name: preprocessing.get(name, 0)
@@ -257,11 +254,14 @@ def __init__(self, templates, metric='accuracy', cost=False, init_params=None, s
         if not isinstance(templates, list):
             templates = [templates]
 
+        self.templates = templates
+        self._template_names, self._template_dicts = self._get_templates(templates)
         self._default_init_params = {}
         self._generate_init_params(init_params)
 
-        self.templates = templates
-        self._template_names, self._template_dicts = self._get_templates(templates)
+        for name, template in self._template_dicts.items():
+            init_params = self._init_params.get(name, self._default_init_params)
+            self._update_params(template['init_params'], init_params)
 
         self._generate_preprocessing(preprocessing)
         self._static = {

From 5fd72b27454f038aba3fbddfa74a3a41d290e171 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 8 Jun 2020 20:21:42 +0200
Subject: [PATCH 039/171] Lint and code improvement.

---
 greenguard/pipeline.py | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index a8e55d0..852343b 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -142,10 +142,6 @@ class GreenGuardPipeline(object):
     _init_params = None
     _preprocessing = None
 
-    @staticmethod
-    def _clone_pipeline(pipeline):
-        return MLPipeline.from_dict(pipeline.to_dict())
-
     def _get_cv(self, stratify, cv_splits, shuffle, random_state):
         if stratify:
             cv_class = StratifiedKFold
@@ -298,7 +294,6 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
         y = target_times['target']
 
         if preprocessing:
-
             if preprocessing > static:
                 raise ValueError('Preprocessing cannot be bigger than static')
 
@@ -345,7 +340,6 @@ def _cross_validate(self, template_splits, hyperparams):
         return np.mean(scores)
 
     def _make_btb_scorer(self, target_times, readings, turbines):
-
         splits = {}
 
         def scorer(template_name, config):

From e132aac3f9321ea462eeddb05c5339473e0ef3db Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 8 Jun 2020 20:40:38 +0200
Subject: [PATCH 040/171] Fix test.

---
 greenguard/pipeline.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 852343b..efcfa44 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -257,7 +257,8 @@ def __init__(self, templates, metric='accuracy', cost=False, init_params=None, s
 
         for name, template in self._template_dicts.items():
             init_params = self._init_params.get(name, self._default_init_params)
-            self._update_params(template['init_params'], init_params)
+            template_params = template.setdefault('init_params', {})
+            self._update_params(template_params, init_params)
 
         self._generate_preprocessing(preprocessing)
         self._static = {

From a0040d43623051341d83d86b8faa8e5b8a17cd87 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 9 Jun 2020 14:08:51 +0200
Subject: [PATCH 041/171] remove unecessary code

---
 greenguard/pipeline.py | 29 ++---------------------------
 1 file changed, 2 insertions(+), 27 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index efcfa44..8bcb218 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -150,15 +150,6 @@ def _get_cv(self, stratify, cv_splits, shuffle, random_state):
 
         return cv_class(n_splits=cv_splits, shuffle=shuffle, random_state=random_state)
 
-    def _get_init_params(self, template_name):
-        if self._init_params is None:
-            return {}
-
-        elif any(name in self._init_params for name in list(self.templates.keys())):
-            return self._init_params.get(template_name) or {}
-
-        return self._init_params
-
     def _set_hyperparameters(self, new_hyperparameters):
         self._hyperparameters = deepcopy(new_hyperparameters)
 
@@ -166,15 +157,6 @@ def _set_template(self, template_name):
         self.template_name = template_name
         self.template = self._template_dicts[self.template_name]
 
-    def _get_preprocessing(self, template_name):
-        if isinstance(self._preprocessing, int):
-            return self._preprocessing
-
-        if isinstance(self._preprocessing, dict):
-            return self._preprocessing.get(template_name) or 0
-
-        return 0  # by default
-
     @staticmethod
     def _update_params(old, new):
         for name, params in new.items():
@@ -185,8 +167,7 @@ def _update_params(old, new):
             for param, value in params.items():
                 block_params[param] = value
 
-    def _count_static_steps(self, template_name):
-        pipeline = MLPipeline(self._template_dicts.get(template_name))
+    def _count_static_steps(self, pipeline):
         tunable_hyperparams = pipeline.get_tunable_hyperparameters()
         for index, block_name in enumerate(pipeline.blocks.keys()):
             if tunable_hyperparams[block_name]:
@@ -261,11 +242,6 @@ def __init__(self, templates, metric='accuracy', cost=False, init_params=None, s
             self._update_params(template_params, init_params)
 
         self._generate_preprocessing(preprocessing)
-        self._static = {
-            name: self._count_static_steps(name)
-            for name in self._template_names
-        }
-
         self._set_template(self._template_names[0])
         self._hyperparameters = dict()
         self._build_pipeline()
@@ -289,8 +265,7 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
         template = self._template_dicts.get(template_name)
         pipeline = MLPipeline(template)
         preprocessing = self._preprocessing.get(template_name)
-        static = self._static.get(template_name)
-
+        static = self._count_static_steps(pipeline)
         X = target_times[['turbine_id', 'cutoff_time']]
         y = target_times['target']
 

From 23c77e6a753deb44ebfd612b8cbf182426f64dca Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 15 Jun 2020 15:27:23 +0200
Subject: [PATCH 042/171] Update docstrings for the release v0.2.1

---
 docs/advanced_usage/concepts.md          |  15 +-
 greenguard/pipeline.py                   |  19 ++-
 notebooks/1. GreenGuard Quickstart.ipynb | 200 +++++++++++++----------
 3 files changed, 138 insertions(+), 96 deletions(-)

diff --git a/docs/advanced_usage/concepts.md b/docs/advanced_usage/concepts.md
index 302d34a..16c6f97 100644
--- a/docs/advanced_usage/concepts.md
+++ b/docs/advanced_usage/concepts.md
@@ -47,10 +47,23 @@ which hyperparameters are more likely to get the best results in the next iterat
 
 We call each one of these evaluations a **tuning iteration**.
 
+## Tuning Session
+
+We call tuning session to the [BTBSession](
+https://hdi-project.github.io/BTB/tutorials/03_Session.html) instance generated for a given
+collection of templates and data. This tuning session searches for the best solution for the
+tuning problem by performing tuning and selection over the given templates, evaluating wich
+template to try next according to their previous score using a [Multi-armed Bandit](
+https://en.wikipedia.org/wiki/Multi-armed_bandit) aproach.
+
+The tuning session is in charge of discarding the templates that are not useful, updating the
+best template to be used and it's hyperparameters that have generated the best score for the
+given data.
+
 ## GreenGuardPipeline
 
 This class is the one in charge of loading the **MLBlocks Pipelines** configured in the
 system and use them to learn from the data and make predictions.
 
-This class is also responsible for tuning the pipeline hyperparameters using [BTB](
+This class is also responsible for creating the tuning session with [BTB](
 https://hdi-project.github.io/BTB/)
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 8bcb218..f094d77 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -97,18 +97,22 @@ class GreenGuardPipeline(object):
             the tuning loop.
 
     Args:
-        template (str or MLPipeline):
+        templates (str, MLPipeline or list):
             Template to use. If a ``str`` is given, load the corresponding
-            ``MLPipeline``.
+            ``MLPipeline``. Also can be a list combining both.
         metric (str or function):
             Metric to use. If an ``str`` is give it must be one of the metrics
             defined in the ``greenguard.metrics.METRICS`` dictionary.
         cost (bool):
             Whether the metric is a cost function (the lower the better) or not.
             Defaults to ``False``.
-        init_params (dict):
+        init_params (dict or list):
             Initial parameters to pass to the underlying MLPipeline if something
-            other than the defaults need to be used.
+            other than the defaults need to be used. If a single dict is given
+            it will be used for all the templates. If is a list of dicts, those
+            will be matched by position with the templates. If a dict that has
+            as keys the name of the templates and as values a dict with init
+            params, those will be used for each corresponding template.
             Defaults to ``None``.
         stratify (bool):
             Whether to stratify the data when partitioning for cross validation.
@@ -121,10 +125,13 @@ class GreenGuardPipeline(object):
         random_state (int or RandomState):
             random state to use for the cross validation partitioning.
             Defaults to ``0``.
-        preprocessing (int):
+        preprocessing (int, dict or list):
             Number of steps to execute during the preprocessing stage.
             The number of preprocessing steps cannot be higher than the
-            number of static steps in the given template.
+            number of static steps in the given template. If is a list of ints,
+            those will be matched by position with the templates. If a dict that
+            has as keys the name of the templates and as values a int those will be
+            used for each corresponding template.
             Defaults to ``0``.
     """
 
diff --git a/notebooks/1. GreenGuard Quickstart.ipynb b/notebooks/1. GreenGuard Quickstart.ipynb
index 5415d8a..4fd08b8 100644
--- a/notebooks/1. GreenGuard Quickstart.ipynb	
+++ b/notebooks/1. GreenGuard Quickstart.ipynb	
@@ -14,9 +14,9 @@
     "This notebook shows how to use GreenGuard to:\n",
     "\n",
     "- Load demo data\n",
-    "- Find available pipelines and load one as a template\n",
-    "- Tune the template arguments to generate the optimal pipeline\n",
-    "- Fit the pipeline to our data\n",
+    "- Find available pipelines and load two of them as templates\n",
+    "- Tune the templates to find the best template for the given data and its hyperparameters\n",
+    "- Fit the found pipeline to our data\n",
     "- Make predictions using the pipeline\n",
     "- Evaluate the goodness-of-fit"
    ]
@@ -393,10 +393,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 3. Finding a Template\n",
+    "## 3. Finding the Templates\n",
     "\n",
-    "The next step will be to select a template from the ones available in\n",
-    "GreenGuard.\n",
+    "The next step will be to select a collection of templates from the ones\n",
+    "available in GreenGuard.\n",
     "\n",
     "For this, we can use the `greenguard.get_pipelines` function, which will\n",
     "return us the list of all the available MLBlocks pipelines found in the\n",
@@ -476,9 +476,9 @@
     {
      "data": {
       "text/plain": [
-       "{'resample_600s_unstack_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json'}"
+       "{'resample_600s_unstack_dfs_1d_xgb_classifier': '/home/pacho/Projects/mit/GreenGuard/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_normalize_dfs_1d_xgb_classifier': '/home/pacho/Projects/mit/GreenGuard/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/home/pacho/Projects/mit/GreenGuard/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json'}"
       ]
      },
      "execution_count": 12,
@@ -494,8 +494,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "For the rest of this tutorial, we will select and use the pipeline\n",
-    "`resample_600s_unstack_normalize_dfs_1d_xgb_classifier` and `resample_600s_normalize_dfs_1d_xgb_classifier`.\n",
+    "For the rest of this tutorial, we will select and use the templates\n",
+    "`resample_600s_unstack_normalize_dfs_1d_xgb_classifier` and\n",
+    "`resample_600s_normalize_dfs_1d_xgb_classifier`.\n",
     "\n",
     "The `resample_600s_unstack_normalize_dfs_1d_xgb_classifier` template contains the following steps:\n",
     "\n",
@@ -525,11 +526,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 3. Finding the best Pipeline\n",
+    "## 4. Finding the best Pipeline\n",
     "\n",
     "Once we have loaded the data, we create a **GreenGuardPipeline** instance by passing:\n",
     "\n",
-    "* `template (string)`: the name of a template or the path to a template json file.\n",
+    "* `templates (string or list)`: the name of a template, the path to a template json file or\n",
+    "a list that can combine both of them.\n",
     "* `metric (string or function)`: The name of the metric to use or a metric function to use.\n",
     "* `cost (bool)`: Whether the metric is a cost function to be minimized or a score to be maximized.\n",
     "\n",
@@ -597,14 +599,14 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 01:40 | Progress: 100%|██████████\n",
-      "Elapsed: 00:53 | Progress: 100%|██████████\n",
+      "Elapsed: 00:41 | Progress: 100%|██████████\n",
+      "Elapsed: 00:18 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 01:37 | Progress: 100%|██████████\n",
-      "Elapsed: 00:45 | Progress: 100%|██████████\n",
+      "Elapsed: 00:37 | Progress: 100%|██████████\n",
+      "Elapsed: 00:18 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 01:52 | Progress: 100%|██████████\n",
-      "Elapsed: 00:58 | Progress: 100%|██████████\n"
+      "Elapsed: 00:37 | Progress: 100%|██████████\n",
+      "Elapsed: 00:18 | Progress: 100%|██████████\n"
      ]
     },
     {
@@ -620,7 +622,7 @@
       "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
       "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
       "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6013257575757575\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6079987550575785\n",
       "INFO:btb.session:Obtaining default configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n"
      ]
     },
@@ -629,13 +631,13 @@
      "output_type": "stream",
      "text": [
       "Built 99 features\n",
-      "Elapsed: 05:21 | Progress: 100%|██████████\n",
-      "Elapsed: 02:31 | Progress: 100%|██████████\n",
+      "Elapsed: 02:06 | Progress: 100%|██████████\n",
+      "Elapsed: 01:02 | Progress: 100%|██████████\n",
       "Built 99 features\n",
-      "Elapsed: 02:37 | Progress: 100%|██████████\n",
-      "Elapsed: 01:14 | Progress: 100%|██████████\n",
+      "Elapsed: 01:53 | Progress: 100%|██████████\n",
+      "Elapsed: 00:54 | Progress: 100%|██████████\n",
       "Built 99 features\n",
-      "Elapsed: 02:17 | Progress: 100%|██████████\n",
+      "Elapsed: 01:55 | Progress: 100%|██████████\n",
       "Elapsed: 01:10 | Progress: 100%|██████████\n"
      ]
     },
@@ -647,30 +649,40 @@
       "INFO:greenguard.pipeline:New configuration found:\n",
       "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 76\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 663\n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 9\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 28\n",
       "      ('xgboost.XGBClassifier#1', 'max_depth'): 4\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6981330874338336\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.42260412740973985\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 10\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6636363636363637\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3977560491030686\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.19143248884807773\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 8\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6418782052584869\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n"
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 14\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 18\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 5\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39294364912150626\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.3393295330438333\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 9\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6671775409915827\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': '73d39187c5c6ecfac03b05d407bf709d',\n",
+       "{'id': '2a494af25e2d986c9178fd47820d4b00',\n",
        " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 76,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 663,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 4,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6981330874338336,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.42260412740973985,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
-       " 'score': 0.6636363636363637}"
+       "   'max_labels'): 14,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 18,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 5,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39294364912150626,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.3393295330438333,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 9},\n",
+       " 'score': 0.6671775409915827}"
       ]
      },
      "execution_count": 16,
@@ -698,16 +710,16 @@
     {
      "data": {
       "text/plain": [
-       "{'id': '73d39187c5c6ecfac03b05d407bf709d',\n",
+       "{'id': '2a494af25e2d986c9178fd47820d4b00',\n",
        " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 76,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 663,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 4,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6981330874338336,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.42260412740973985,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
-       " 'score': 0.6636363636363637}"
+       "   'max_labels'): 14,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 18,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 5,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39294364912150626,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.3393295330438333,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 9},\n",
+       " 'score': 0.6671775409915827}"
       ]
      },
      "execution_count": 17,
@@ -723,7 +735,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "You can check that the new hyperparameters are already set by callgin `get_hyperparameters` method: "
+    "You can check that the new hyperparameters are already set by calling `get_hyperparameters` method: "
    ]
   },
   {
@@ -735,12 +747,12 @@
      "data": {
       "text/plain": [
        "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 76,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 663,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 4,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6981330874338336,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.42260412740973985,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10}"
+       "  'max_labels'): 14,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 18,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 5,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39294364912150626,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.3393295330438333,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 9}"
       ]
      },
      "execution_count": 18,
@@ -756,7 +768,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "We can check the template name that is used to generate the pipeline:"
+    "We can also check the template name that is used to generate the pipeline:"
    ]
   },
   {
@@ -795,7 +807,7 @@
     {
      "data": {
       "text/plain": [
-       "0.6636363636363637"
+       "0.6671775409915827"
       ]
      },
      "execution_count": 20,
@@ -812,7 +824,7 @@
    "metadata": {},
    "source": [
     "**NOTE**: If the score is not good enough, we can call the `run` method of the `session` again,\n",
-    "specifying the amount of iterations, and this will continue its tuning process continuing from\n",
+    "specifying the amount of iterations, and this will resume its tuning process continuing from\n",
     "the previous results!"
    ]
   },
@@ -827,39 +839,39 @@
      "text": [
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 99\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 143\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 9\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.06337107325877978\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.932864412690726\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 10\n",
+      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6854149434794596\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 17\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 880\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3214711402471415\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.9330408960929772\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 10\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6807110281923715\n"
+      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': 'f47187d007ea31262e087264580716c9',\n",
+       "{'id': '9999fcb9fdc53cf7bf8f1398cea07fab',\n",
        " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 17,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 880,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3214711402471415,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.9330408960929772,\n",
+       "   'max_labels'): 99,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 143,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 9,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.06337107325877978,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.932864412690726,\n",
        "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
-       " 'score': 0.6807110281923715}"
+       " 'score': 0.6854149434794596}"
       ]
      },
      "execution_count": 21,
@@ -879,7 +891,7 @@
     {
      "data": {
       "text/plain": [
-       "0.6807110281923715"
+       "0.6854149434794596"
       ]
      },
      "execution_count": 22,
@@ -900,11 +912,11 @@
      "data": {
       "text/plain": [
        "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 17,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 880,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3214711402471415,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.9330408960929772,\n",
+       "  'max_labels'): 99,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 143,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 9,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.06337107325877978,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.932864412690726,\n",
        " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10}"
       ]
      },
@@ -921,7 +933,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 4. Fitting the pipeline\n",
+    "## 5. Fitting the pipeline\n",
     "\n",
     "Once we are satisfied with the obtained cross validation score, we can proceed to call\n",
     "the `fit` method passing again the same data elements.\n",
@@ -940,7 +952,7 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 00:55 | Progress: 100%|██████████\n"
+      "Elapsed: 00:48 | Progress: 100%|██████████\n"
      ]
     }
    ],
@@ -952,7 +964,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 5. Use the fitted pipeline\n",
+    "## 6. Use the fitted pipeline\n",
     "\n",
     "After fitting the pipeline, we are ready to make predictions on new data:"
    ]
@@ -989,7 +1001,7 @@
     {
      "data": {
       "text/plain": [
-       "0.76"
+       "0.7346938775510203"
       ]
      },
      "execution_count": 26,
@@ -1007,7 +1019,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 6. Save and load the pipeline\n",
+    "## 7. Save and load the pipeline\n",
     "\n",
     "Since the tuning and fitting process takes time to execute and requires a lot of data, you\n",
     "will probably want to save a fitted instance and load it later to analyze new signals\n",
@@ -1056,15 +1068,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elapsed: 00:01 | Progress:  11%|█         "
+      "Elapsed: 00:19 | Progress: 100%|██████████\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 0, 0, 1, 0])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [

From 0b9c4dec5193f402604715a09f669a29e3b1fe1d Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Mon, 15 Jun 2020 16:26:50 +0200
Subject: [PATCH 043/171] Improve phrasing

---
 docs/advanced_usage/concepts.md | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/docs/advanced_usage/concepts.md b/docs/advanced_usage/concepts.md
index 16c6f97..f39bffa 100644
--- a/docs/advanced_usage/concepts.md
+++ b/docs/advanced_usage/concepts.md
@@ -38,8 +38,8 @@ https://hdi-project.github.io/MLBlocks), or using the **GreenGuardPipeline**.
 
 ## Tuning
 
-We call tuning the process of, given a dataset and a template, finding the pipeline derived from
-the template that gets the best possible score on the dataset.
+We call tuning the process of, given a dataset and a collection of templates, finding the pipeline
+derived from the templates that gets the best possible score on the dataset.
 
 This process usually involves fitting and evaluating multiple pipelines with different
 hyperparameter configurations on the same data while using optimization algorithms to deduce
@@ -47,23 +47,15 @@ which hyperparameters are more likely to get the best results in the next iterat
 
 We call each one of these evaluations a **tuning iteration**.
 
-## Tuning Session
-
-We call tuning session to the [BTBSession](
-https://hdi-project.github.io/BTB/tutorials/03_Session.html) instance generated for a given
-collection of templates and data. This tuning session searches for the best solution for the
-tuning problem by performing tuning and selection over the given templates, evaluating wich
-template to try next according to their previous score using a [Multi-armed Bandit](
-https://en.wikipedia.org/wiki/Multi-armed_bandit) aproach.
-
-The tuning session is in charge of discarding the templates that are not useful, updating the
-best template to be used and it's hyperparameters that have generated the best score for the
-given data.
+The process of selecting and tuning the templates is handled by a [BTBSession](
+https://hdi-project.github.io/BTB/tutorials/03_Session.html), which is responsible for
+discarding the templates that do not work on the given data and for keeping
+track of the template and hyperparameters that obtain the best performance.
 
 ## GreenGuardPipeline
 
 This class is the one in charge of loading the **MLBlocks Pipelines** configured in the
 system and use them to learn from the data and make predictions.
 
-This class is also responsible for creating the tuning session with [BTB](
-https://hdi-project.github.io/BTB/)
+This class is also responsible for creating the BTBSession that will handle the
+selection and tuning of the templates.

From 2fe6bf69723f5f8b5a3390ac52ab8bd23ed370d7 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 15 Jun 2020 18:35:04 +0200
Subject: [PATCH 044/171] Anon path.

---
 notebooks/1. GreenGuard Quickstart.ipynb | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/notebooks/1. GreenGuard Quickstart.ipynb b/notebooks/1. GreenGuard Quickstart.ipynb
index 4fd08b8..ec7c0a7 100644
--- a/notebooks/1. GreenGuard Quickstart.ipynb	
+++ b/notebooks/1. GreenGuard Quickstart.ipynb	
@@ -476,9 +476,9 @@
     {
      "data": {
       "text/plain": [
-       "{'resample_600s_unstack_dfs_1d_xgb_classifier': '/home/pacho/Projects/mit/GreenGuard/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_normalize_dfs_1d_xgb_classifier': '/home/pacho/Projects/mit/GreenGuard/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/home/pacho/Projects/mit/GreenGuard/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json'}"
+       "{'resample_600s_unstack_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json'}"
       ]
      },
      "execution_count": 12,

From 76142d3ca35f3b5e39e310074dbde178bb4a282b Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 15 Jun 2020 21:54:11 +0200
Subject: [PATCH 045/171] Improve pipeline docstring

---
 greenguard/pipeline.py | 28 ++++++++++++++++------------
 1 file changed, 16 insertions(+), 12 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index f094d77..135ebbd 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -107,12 +107,14 @@ class GreenGuardPipeline(object):
             Whether the metric is a cost function (the lower the better) or not.
             Defaults to ``False``.
         init_params (dict or list):
-            Initial parameters to pass to the underlying MLPipeline if something
-            other than the defaults need to be used. If a single dict is given
-            it will be used for all the templates. If is a list of dicts, those
-            will be matched by position with the templates. If a dict that has
-            as keys the name of the templates and as values a dict with init
-            params, those will be used for each corresponding template.
+            There are three possible values for init_params:
+
+                * Init params ``dict``: It will be used for all templates.
+                * ``dict`` with the name of the template as a key and dictionary with its
+                  init params.
+                * ``list``: each value will be assigned to the corresponding position of
+                  self.templates.
+
             Defaults to ``None``.
         stratify (bool):
             Whether to stratify the data when partitioning for cross validation.
@@ -126,12 +128,14 @@ class GreenGuardPipeline(object):
             random state to use for the cross validation partitioning.
             Defaults to ``0``.
         preprocessing (int, dict or list):
-            Number of steps to execute during the preprocessing stage.
-            The number of preprocessing steps cannot be higher than the
-            number of static steps in the given template. If is a list of ints,
-            those will be matched by position with the templates. If a dict that
-            has as keys the name of the templates and as values a int those will be
-            used for each corresponding template.
+            There are three possible values for preprocessing:
+
+                * ``int``: the value will be used for all templates.
+                * ``dict`` with the template name as a key and a number as a value, will
+                  be used for that template.
+                * ``list``: each value will be assigned to the corresponding position of
+                  self.templates.
+
             Defaults to ``0``.
     """
 

From 58c75f2cb0d39760ef8a5fd0ed5f2b06cd3430ef Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 16 Jun 2020 11:06:11 +0200
Subject: [PATCH 046/171] Add release notes for v0.2.1

---
 HISTORY.md | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/HISTORY.md b/HISTORY.md
index 90dec27..c5a9de0 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,30 @@
 # History
 
+## 0.2.1 - 2020-06-16
+
+With this release we give the possibility to the user to specify more than one template when
+creating a GreenGuardPipeline. When the `tune` method of this is called, an instance of BTBSession
+is returned and it is in charge of selecting the templates and tuning their hyperparameters until
+achieving the best pipeline.
+
+### Internal Improvements
+
+* Resample by filename inside the `CSVLoader` to avoid oversampling of data that will not be used.
+* Select targets now allows them to be equal.
+* Fixed the csv filename format.
+* Upgraded to BTB.
+
+### Bug Fixes
+
+* Issue #33: Wrong default datetime format
+
+### Resolved Issues
+
+* Issue #35: Select targets is too strict
+* Issue #36: resample by filename inside csvloader
+* Issue #39: Upgrade BTB
+* Issue #41: Fix CSV filename format
+
 ## 0.2.0 - 2020-02-14
 
 First stable release:

From 43e9a3ee13dafa37541be2dbd876252c7996f7ee Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 16 Jun 2020 11:06:15 +0200
Subject: [PATCH 047/171] =?UTF-8?q?Bump=20version:=200.2.1.dev1=20?=
 =?UTF-8?q?=E2=86=92=200.2.1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 3 +--
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index df69d4a..db65211 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.1.dev1'
+__version__ = '0.2.1'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index f6734b1..aa01481 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.1.dev1
+current_version = 0.2.1
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
@@ -45,4 +45,3 @@ test = pytest
 
 [tool:pytest]
 collect_ignore = ['setup.py']
-
diff --git a/setup.py b/setup.py
index 89083b4..accc5d1 100644
--- a/setup.py
+++ b/setup.py
@@ -105,6 +105,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.1.dev1',
+    version='0.2.1',
     zip_safe=False,
 )

From dba471652ad7e1833732e7531b91d7cb6698d8bd Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 16 Jun 2020 11:06:54 +0200
Subject: [PATCH 048/171] =?UTF-8?q?Bump=20version:=200.2.1=20=E2=86=92=200?=
 =?UTF-8?q?.2.2.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index db65211..c9e61c2 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.1'
+__version__ = '0.2.2.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index aa01481..7f91cdd 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.1
+current_version = 0.2.2.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index accc5d1..67516b5 100644
--- a/setup.py
+++ b/setup.py
@@ -105,6 +105,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.1',
+    version='0.2.2.dev0',
     zip_safe=False,
 )

From fb289e8e89306fa31371171eb689cb8e1bc3d5db Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 22 Jun 2020 10:52:24 +0200
Subject: [PATCH 049/171] Rearrange docker

---
 .dockerignore                                 |  2 -
 Makefile                                      | 64 +++++++------
 docker/.dockerignore                          |  2 +
 Dockerfile => docker/Dockerfile               |  0
 docker/Makefile                               | 45 ++++++++++
 DOCKER.md => docker/README.md                 | 89 ++++++++++++++-----
 .../docker-compose.yml                        |  0
 docker/greenguard-deployment.yml              | 34 +++++++
 8 files changed, 187 insertions(+), 49 deletions(-)
 delete mode 100644 .dockerignore
 create mode 100644 docker/.dockerignore
 rename Dockerfile => docker/Dockerfile (100%)
 create mode 100644 docker/Makefile
 rename DOCKER.md => docker/README.md (50%)
 rename docker-compose.yml => docker/docker-compose.yml (100%)
 create mode 100644 docker/greenguard-deployment.yml

diff --git a/.dockerignore b/.dockerignore
deleted file mode 100644
index 7ea8e51..0000000
--- a/.dockerignore
+++ /dev/null
@@ -1,2 +0,0 @@
-notebooks-private/
-.tox/
diff --git a/Makefile b/Makefile
index 7683786..32a5ba3 100644
--- a/Makefile
+++ b/Makefile
@@ -213,30 +213,40 @@ release-major: check-release bumpversion-major release
 
 # DOCKER TARGETS
 
-.PHONY: docker-jupyter-clean
-docker-jupyter-clean: ## Remove the greenguard-jupyter docker image
-	docker rmi -f greenguard-jupyter
-
-.PHONY: docker-jupyter-build
-docker-jupyter-build:  ## Build the greenguard-jupyter docker image using repo2docker
-	docker build -t greenguard-jupyter .
-
-.PHONY: docker-jupyter-save
-docker-jupyter-save: docker-jupyter-build  ## Build the greenguard-jupyter image and save it as greenguard-jupyter.tar
-	docker save --output greenguard-jupyter.tar greenguard-jupyter
-
-.PHONY: docker-jupyter-load
-docker-jupyter-load: ## Load the greenguard-jupyter image from greenguard-jupyter.tar
-	docker load --input greenguard-jupyter.tar
-
-.PHONY: docker-jupyter-run
-docker-jupyter-run: ## Run the greenguard-jupyter image in editable mode
-	docker run --rm -v $(shell pwd):/greenguard -ti -p8888:8888 --name greenguard-jupyter greenguard-jupyter
-
-.PHONY: docker-jupyter-start
-docker-jupyter-start: ## Start the greenguard-jupyter image as a daemon
-	docker run --rm -d -v $(shell pwd):/greenguard -ti -p8888:8888 --name greenguard-jupyter greenguard-jupyter
-
-.PHONY: docker-jupyter-stop
-docker-jupyter-stop: ## Stop the greenguard-jupyter daemon
-	docker stop greenguard-jupyter
+.PHONY: docker-clean
+docker-clean: ## Remove the greenguard docker image
+	docker rmi -f greenguard
+
+.PHONY: docker-build
+docker-build:
+	docker build -f docker/Dockerfile -t greenguard .
+
+.PHONY: docker-save
+docker-save: docker-build  ## Build the greenguard image and save it as greenguard.tar
+	docker save --output greenguard.tar greenguard
+
+.PHONY: docker-load
+docker-load: ## Load the greenguard image from greenguard.tar
+	docker load --input greenguard.tar
+
+.PHONY: docker-run
+docker-run: ## Run the greenguard image in editable mode
+	docker run --rm -v $(shell pwd):/greenguard -ti -p8888:8888 --name greenguard greenguard
+
+.PHONY: docker-start
+docker-start: ## Start the greenguard image as a daemon
+	docker run --rm -d -v $(shell pwd):/greenguard -ti -p8888:8888 --name greenguard greenguard
+
+.PHONY: docker-stop
+docker-stop: ## Stop the greenguard daemon
+	docker stop greenguard
+
+.PHONY: docker-login
+docker-login:
+	docker login
+
+.PHONY: docker-push
+docker-push: docker-login docker-build
+	@$(eval VERSION := $(shell python -c 'import greenguard; print(greenguard.__version__)'))
+	docker tag greenguard signals-dev/greenguard:$(VERSION)
+	docker push signals-dev/greenguard:$(VERSION)
diff --git a/docker/.dockerignore b/docker/.dockerignore
new file mode 100644
index 0000000..a87abf0
--- /dev/null
+++ b/docker/.dockerignore
@@ -0,0 +1,2 @@
+../notebooks-private/
+../.tox/
diff --git a/Dockerfile b/docker/Dockerfile
similarity index 100%
rename from Dockerfile
rename to docker/Dockerfile
diff --git a/docker/Makefile b/docker/Makefile
new file mode 100644
index 0000000..afc04f0
--- /dev/null
+++ b/docker/Makefile
@@ -0,0 +1,45 @@
+.DEFAULT_GOAL := help
+
+define BROWSER_PYSCRIPT
+import os, webbrowser, sys
+
+try:
+	from urllib import pathname2url
+except:
+	from urllib.request import pathname2url
+
+webbrowser.open("file://" + pathname2url(http://github.com/os.path.abspath(sys.argv[1])))
+endef
+export BROWSER_PYSCRIPT
+
+define PRINT_HELP_PYSCRIPT
+import re, sys
+
+for line in sys.stdin:
+	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
+	if match:
+		target, help = match.groups()
+		print("%-20s %s" % (target, help))
+endef
+export PRINT_HELP_PYSCRIPT
+
+BROWSER := python -c "$$BROWSER_PYSCRIPT"
+
+.PHONY: help
+help:
+	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
+
+# DOCKER TARGET
+.PHONY: docker-login
+docker-login:
+	docker login
+
+.PHONY: docker-build
+docker-build:
+	docker build -t greenguard-jupyter .
+
+.PHONY: docker-push
+docker-push: docker-login docker-build
+	@$(eval VERSION := $(shell python -c 'import greenguard; print(greenguard.__version__)'))
+	docker tag greenguard-jupyter signals-dev/greenguard:$(VERSION)
+	docker push signals-dev/greenguard:$(VERSION)
diff --git a/DOCKER.md b/docker/README.md
similarity index 50%
rename from DOCKER.md
rename to docker/README.md
index 91d1a28..19c12b3 100644
--- a/DOCKER.md
+++ b/docker/README.md
@@ -9,36 +9,38 @@ installed.
 The only requirement in order to run the GreenGuard Docker image is to have Docker installed and
 that the user has enough permissions to run it.
 
-Installation instructions for any possible system compatible can be found [here](https://docs.docker.com/install/)
+Installation instructions for any possible system compatible can be found [here](
+https://docs.docker.com/install/).
 
 Additionally, the system that builds the GreenGuard Docker image will also need to have a working
 internet connection that allows downloading the base image and the additional python depenedencies.
 
 ## Building the GreenGuard Docker Image
 
-After having cloned the **GreenGuard** repository, all you have to do in order to build the GreenGuard Docker
-Image is running this command:
+After having cloned the **GreenGuard** repository, all you have to do in order to build the
+GreenGuard Docker Image is running this command:
 
 ```bash
-make docker-jupyter-build
+make docker-build
 ```
 
-After a few minutes, the new image, called `greenguard-jupyter`, will have been built into the system
+After a few minutes, the new image, called `greenguard`, will have been built into the system
 and will be ready to be used or distributed.
 
 ## Distributing the GreenGuard Docker Image
 
-Once the `greenguard-jupyter` image is built, it can be distributed in several ways.
+Once the `greenguard` image is built, it can be distributed in several ways.
 
 ### Distributing using a Docker registry
 
-The simplest way to distribute the recently created image is [using a registry](https://docs.docker.com/registry/).
+The simplest way to distribute the recently created image is [using a registry](
+https://docs.docker.com/registry/).
 
 In order to do so, we will need to have write access to a public or private registry (remember to
 [login](https://docs.docker.com/engine/reference/commandline/login/)!) and execute these commands:
 
 ```bash
-docker tag greenguard-jupyter:latest your-registry-name:some-tag
+docker tag greenguard:latest your-registry-name:some-tag
 docker push your-registry-name:some-tag
 ```
 
@@ -46,7 +48,7 @@ Afterwards, in the receiving machine:
 
 ```bash
 docker pull your-registry-name:some-tag
-docker tag your-registry-name:some-tag greenguard-jupyter:latest
+docker tag your-registry-name:some-tag greenguard:latest
 ```
 
 ### Distributing as a file
@@ -57,32 +59,32 @@ using the following command.
 In the system that already has the image:
 
 ```bash
-docker save --output greenguard-jupyter.tar greenguard-jupyter
+docker save --output greenguard.tar greenguard
 ```
 
-Then copy over the file `greenguard-jupyter.tar` to the new system and there, run:
+Then copy over the file `greenguard.tar` to the new system and there, run:
 
 ```bash
-docker load --input greenguard-jupyter.tar
+docker load --input greenguard.tar
 ```
 
-After these commands, the `greenguard-jupyter` image should be available and ready to be used in the
+After these commands, the `greenguard` image should be available and ready to be used in the
 new system.
 
 
-## Running the greenguard-jupyter image
+## Running the greenguard image
 
-Once the `greenguard-jupyter` image has been built, pulled or loaded, it is ready to be run.
+Once the `greenguard` image has been built, pulled or loaded, it is ready to be run.
 
 This can be done in two ways:
 
-### Running greenguard-jupyter with the code
+### Running greenguard with the code
 
 If the GreenGuard source code is available in the system, running the image is as simple as running
 this command from within the root of the project:
 
 ```bash
-make docker-jupyter-run
+make docker-run
 ```
 
 This will start a jupyter notebook using the docker image, which you can access by pointing your
@@ -93,20 +95,67 @@ which means that any changes that you do in your local code will immediately be
 within your notebooks, and that any notebook that you create within jupyter will also show
 up in your `notebooks` folder!
 
-### Running greenguard-jupyter without the greenguard code
+### Running greenguard without the greenguard code
 
 If the GreenGuard source code is not available in the system and only the Docker Image is, you can
 still run the image by using this command:
 
 ```bash
-docker run -ti -p 8888:8888 greenguard-jupyter
+docker run -ti -p 8888:8888 greenguard
 ```
 
 In this case, the code changes and the notebooks that you create within jupyter will stay
 inside the container and you will only be able to access and download them through the
 jupyter interface.
 
+## Running the greenguard image on kubernetes
+
+### Running as pod
+
+There is a possiblity to run GreenGuard's docker image on a local kubernetes cluster. Once you have
+created the docker image (locally or remotely) and you have [kubernetes](
+https://kubernetes.io/docs/home/) properly setup at your local environment, copy and paste the
+following pod configuration into a `yml` file:
+
+```yml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: greenguard
+spec:
+  containers:
+  - name: greenguard
+    image: signals-dev/greenguard-jupyter:0.2.2.dev0
+    ports:
+    - containerPort: 8888
+```
+
+**Note** If you would like to use your local image that you created previously, or an image
+from another repository that's not the official one, change the `image` value to the one that
+corresponds to yours.
+
+Once you have created the `yml` file, you can run the following command to launch the pod:
+
+```bash
+kubectl apply -f file.yml
+```
+
+This will create a pod named `greenguard` and in order to access it, we will have to forward
+the port 8888 from the pod to our localhost. To do so, just run the following command:
+
+```bash
+kubectl port-forward greeguard 8888
+```
+
+Finally we can point our browser to http://localhost:8888 and use the GreenGuard software.
+
+### Running GreenGuard a service
+
+Kubernetes allows the posibility to run a docker image as a services, inside this folder you
+will find a `greenguard-deployment.yml` file, ready to use as an deployment service, which has
+the port forwarded to the `30088`. You can use this template to adapt it to your needs.
+
 ## What's next?
 
 For more details about **GreenGuard** and all its possibilities and features, please check the
-[project documentation site](https://D3-AI.github.io/GreenGuard/)!
+[project documentation site](https://signals-dev.github.io/GreenGuard/)!
diff --git a/docker-compose.yml b/docker/docker-compose.yml
similarity index 100%
rename from docker-compose.yml
rename to docker/docker-compose.yml
diff --git a/docker/greenguard-deployment.yml b/docker/greenguard-deployment.yml
new file mode 100644
index 0000000..a51139a
--- /dev/null
+++ b/docker/greenguard-deployment.yml
@@ -0,0 +1,34 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: greenguard-jupyter
+spec:
+  ports:
+  - name: jupyter
+    port: 8888
+    nodePort: 30088
+  selector:
+    app: greenguard-jupyter
+  type: NodePort
+---
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: greenguard-jupyter
+spec:
+  selector:
+    matchLabels:
+      app: greenguard-jupyter
+  strategy:
+    type: Recreate
+  template:
+    metadata:
+      labels:
+        app: greenguard-jupyter
+    spec:
+      containers:
+      - image: pvkdev/greenguard-jupyter:0.2.2.dev0
+        name: greenguard-jupyter
+        ports:
+        - containerPort: 8888
+          name: jupyter

From 859627746456a12a10a280de4b71f9365bd8b93f Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev
 <41479552+pvk-developer@users.noreply.github.com>
Date: Mon, 22 Jun 2020 10:53:05 +0200
Subject: [PATCH 050/171] Delete Makefile

---
 docker/Makefile | 45 ---------------------------------------------
 1 file changed, 45 deletions(-)
 delete mode 100644 docker/Makefile

diff --git a/docker/Makefile b/docker/Makefile
deleted file mode 100644
index afc04f0..0000000
--- a/docker/Makefile
+++ /dev/null
@@ -1,45 +0,0 @@
-.DEFAULT_GOAL := help
-
-define BROWSER_PYSCRIPT
-import os, webbrowser, sys
-
-try:
-	from urllib import pathname2url
-except:
-	from urllib.request import pathname2url
-
-webbrowser.open("file://" + pathname2url(http://github.com/os.path.abspath(sys.argv[1])))
-endef
-export BROWSER_PYSCRIPT
-
-define PRINT_HELP_PYSCRIPT
-import re, sys
-
-for line in sys.stdin:
-	match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line)
-	if match:
-		target, help = match.groups()
-		print("%-20s %s" % (target, help))
-endef
-export PRINT_HELP_PYSCRIPT
-
-BROWSER := python -c "$$BROWSER_PYSCRIPT"
-
-.PHONY: help
-help:
-	@python -c "$$PRINT_HELP_PYSCRIPT" < $(MAKEFILE_LIST)
-
-# DOCKER TARGET
-.PHONY: docker-login
-docker-login:
-	docker login
-
-.PHONY: docker-build
-docker-build:
-	docker build -t greenguard-jupyter .
-
-.PHONY: docker-push
-docker-push: docker-login docker-build
-	@$(eval VERSION := $(shell python -c 'import greenguard; print(greenguard.__version__)'))
-	docker tag greenguard-jupyter signals-dev/greenguard:$(VERSION)
-	docker push signals-dev/greenguard:$(VERSION)

From 5881709e524d4569fa2d99bc64f967f0a202706f Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 22 Jun 2020 13:02:17 +0200
Subject: [PATCH 051/171] Updated structure

---
 Makefile                  |  28 +------
 README.md                 |  23 +-----
 docker/README.md          | 160 ++++++++++----------------------------
 docker/docker-compose.yml |  11 ---
 4 files changed, 45 insertions(+), 177 deletions(-)
 delete mode 100644 docker/docker-compose.yml

diff --git a/Makefile b/Makefile
index 32a5ba3..96cfec3 100644
--- a/Makefile
+++ b/Makefile
@@ -213,34 +213,10 @@ release-major: check-release bumpversion-major release
 
 # DOCKER TARGETS
 
-.PHONY: docker-clean
-docker-clean: ## Remove the greenguard docker image
-	docker rmi -f greenguard
-
 .PHONY: docker-build
 docker-build:
 	docker build -f docker/Dockerfile -t greenguard .
 
-.PHONY: docker-save
-docker-save: docker-build  ## Build the greenguard image and save it as greenguard.tar
-	docker save --output greenguard.tar greenguard
-
-.PHONY: docker-load
-docker-load: ## Load the greenguard image from greenguard.tar
-	docker load --input greenguard.tar
-
-.PHONY: docker-run
-docker-run: ## Run the greenguard image in editable mode
-	docker run --rm -v $(shell pwd):/greenguard -ti -p8888:8888 --name greenguard greenguard
-
-.PHONY: docker-start
-docker-start: ## Start the greenguard image as a daemon
-	docker run --rm -d -v $(shell pwd):/greenguard -ti -p8888:8888 --name greenguard greenguard
-
-.PHONY: docker-stop
-docker-stop: ## Stop the greenguard daemon
-	docker stop greenguard
-
 .PHONY: docker-login
 docker-login:
 	docker login
@@ -248,5 +224,5 @@ docker-login:
 .PHONY: docker-push
 docker-push: docker-login docker-build
 	@$(eval VERSION := $(shell python -c 'import greenguard; print(greenguard.__version__)'))
-	docker tag greenguard signals-dev/greenguard:$(VERSION)
-	docker push signals-dev/greenguard:$(VERSION)
+	docker tag greenguard signalsdev/greenguard:$(VERSION)
+	docker push signalsdev/greenguard:$(VERSION)
diff --git a/README.md b/README.md
index 7bb64d4..8a3c794 100644
--- a/README.md
+++ b/README.md
@@ -69,26 +69,9 @@ If you want to install from source or contribute to the project please read the
 
 ## Docker usage
 
-Alternatively, **GreenGuard** is prepared to be run inside a docker environment using
-`docker-compose`.
-
-For this, make sure to have both [docker](https://docs.docker.com/install/) and [docker-compose](
-https://docs.docker.com/compose/install/) installed on your system and then follow these steps:
-
-1. Clone this repository and go into the `GreenGuard` folder:
-
-```bash
-git clone git@github.com:signals-dev/GreenGuard.git
-cd GreenGuard
-```
-
-2. Start a Jupyter Notebook inside a docker container.
-
-```bash
-docker-compose up --build
-```
-
-3. Point your browser at http://127.0.0.1:8888
+**GreenGuard** is prepared to be run inside a docker environment. Please check the
+[docker documantation](https://github.com/signals-dev/GreenGuard/blob/master/docker/README.md)
+about how run **GreenGuard** using docker.
 
 # Data Format
 
diff --git a/docker/README.md b/docker/README.md
index 19c12b3..564ca4c 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -1,160 +1,80 @@
-# Docker Usage
+# Run GreenGuard using Docker
 
-**GreenGuard** comes configured and ready to be distributed and run as a docker image which starts
-a jupyter notebook already configured to use greenguard, with all the required dependencies already
-installed.
+GreenGuard is prepared to be run using [Docker](https://docker.com/).
 
-## Docker Requirements
-
-The only requirement in order to run the GreenGuard Docker image is to have Docker installed and
-that the user has enough permissions to run it.
-
-Installation instructions for any possible system compatible can be found [here](
-https://docs.docker.com/install/).
-
-Additionally, the system that builds the GreenGuard Docker image will also need to have a working
-internet connection that allows downloading the base image and the additional python depenedencies.
-
-## Building the GreenGuard Docker Image
-
-After having cloned the **GreenGuard** repository, all you have to do in order to build the
-GreenGuard Docker Image is running this command:
+This is the command needed to start a Docker container locally that runs a [Jupyter Notebook](
+https://jupyter.org/) already configured to run GreenGuard.
 
 ```bash
-make docker-build
+docker run -ti -p8888:8888 signals-dev/greenguard:latest
 ```
 
-After a few minutes, the new image, called `greenguard`, will have been built into the system
-and will be ready to be used or distributed.
-
-## Distributing the GreenGuard Docker Image
+Further details about the usage of this image can be found [here](
+https://hub.docker.com/repository/docker/signalsdev/greenguard).
 
-Once the `greenguard` image is built, it can be distributed in several ways.
+## Run GreenGuard on Kubernetes
 
-### Distributing using a Docker registry
+GreenGuard can also be started using [Kubernetes](https://kubernetes.io/).
 
-The simplest way to distribute the recently created image is [using a registry](
-https://docs.docker.com/registry/).
+Here are the minimum steps to do so:
 
-In order to do so, we will need to have write access to a public or private registry (remember to
-[login](https://docs.docker.com/engine/reference/commandline/login/)!) and execute these commands:
+1. Create a POD yaml file with the these contents:
 
-```bash
-docker tag greenguard:latest your-registry-name:some-tag
-docker push your-registry-name:some-tag
-```
-
-Afterwards, in the receiving machine:
-
-```bash
-docker pull your-registry-name:some-tag
-docker tag your-registry-name:some-tag greenguard:latest
+```yml
+apiVersion: v1
+kind: Pod
+metadata:
+  name: greenguard
+spec:
+  containers:
+  - name: greenguard
+    image: signalsdev/greenguard:latest
+    ports:
+    - containerPort: 8888
 ```
 
-### Distributing as a file
-
-If the distribution of the image has to be done offline for any reason, it can be achieved
-using the following command.
-
-In the system that already has the image:
+2. Start the POD locally
 
 ```bash
-docker save --output greenguard.tar greenguard
+kubectl apply -f pod-file.yml
 ```
 
-Then copy over the file `greenguard.tar` to the new system and there, run:
+3. Forward the port 8888
 
 ```bash
-docker load --input greenguard.tar
+kubectl port-forward greenguard 8888
 ```
 
-After these commands, the `greenguard` image should be available and ready to be used in the
-new system.
-
-
-## Running the greenguard image
-
-Once the `greenguard` image has been built, pulled or loaded, it is ready to be run.
+4. Point your browser at http://localhost:8888
 
-This can be done in two ways:
+On the other hand, if you are planing to run GreenGuard on a distributed service, we provided a
+[template file](
+https://github.com/signals-dev/GreenGuard/blob/master/docker/greenguard-deployment.yml)
+that you can use to achieve so.
 
-### Running greenguard with the code
+## Building the Docker image from scratch
 
-If the GreenGuard source code is available in the system, running the image is as simple as running
-this command from within the root of the project:
+In order to build the Docker image from scratch you will need to:
 
-```bash
-make docker-run
-```
-
-This will start a jupyter notebook using the docker image, which you can access by pointing your
-browser at http://127.0.0.1:8888
-
-In this case, the local version of the project will also mounted within the Docker container,
-which means that any changes that you do in your local code will immediately be available
-within your notebooks, and that any notebook that you create within jupyter will also show
-up in your `notebooks` folder!
-
-### Running greenguard without the greenguard code
-
-If the GreenGuard source code is not available in the system and only the Docker Image is, you can
-still run the image by using this command:
+1. Clone the repository
 
 ```bash
-docker run -ti -p 8888:8888 greenguard
+git clone git@github.com:signals-dev/GreenGuard.git
+cd GreenGuard
 ```
 
-In this case, the code changes and the notebooks that you create within jupyter will stay
-inside the container and you will only be able to access and download them through the
-jupyter interface.
-
-## Running the greenguard image on kubernetes
-
-### Running as pod
-
-There is a possiblity to run GreenGuard's docker image on a local kubernetes cluster. Once you have
-created the docker image (locally or remotely) and you have [kubernetes](
-https://kubernetes.io/docs/home/) properly setup at your local environment, copy and paste the
-following pod configuration into a `yml` file:
-
-```yml
-apiVersion: v1
-kind: Pod
-metadata:
-  name: greenguard
-spec:
-  containers:
-  - name: greenguard
-    image: signals-dev/greenguard-jupyter:0.2.2.dev0
-    ports:
-    - containerPort: 8888
-```
-
-**Note** If you would like to use your local image that you created previously, or an image
-from another repository that's not the official one, change the `image` value to the one that
-corresponds to yours.
-
-Once you have created the `yml` file, you can run the following command to launch the pod:
+2. Build the docker image
 
 ```bash
-kubectl apply -f file.yml
+make docker-build
 ```
 
-This will create a pod named `greenguard` and in order to access it, we will have to forward
-the port 8888 from the pod to our localhost. To do so, just run the following command:
+3. If you are generating a new release, you can push to Docker hub using:
 
 ```bash
-kubectl port-forward greeguard 8888
+make docker-push
 ```
 
-Finally we can point our browser to http://localhost:8888 and use the GreenGuard software.
-
-### Running GreenGuard a service
-
-Kubernetes allows the posibility to run a docker image as a services, inside this folder you
-will find a `greenguard-deployment.yml` file, ready to use as an deployment service, which has
-the port forwarded to the `30088`. You can use this template to adapt it to your needs.
-
 ## What's next?
 
 For more details about **GreenGuard** and all its possibilities and features, please check the
diff --git a/docker/docker-compose.yml b/docker/docker-compose.yml
deleted file mode 100644
index dfb7aed..0000000
--- a/docker/docker-compose.yml
+++ /dev/null
@@ -1,11 +0,0 @@
-version: '3'
-services:
-  jupyter:
-    build:
-      context: .
-      args:
-        - UID=${UID:-1000}
-    ports:
-    - "8888:8888"
-    volumes:
-    - .:/app

From 5b7733018e1b255a60c50c3a21402b440c565ba4 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 23 Jun 2020 15:14:38 +0200
Subject: [PATCH 052/171] Fix docker hub link / version.

---
 README.md                        |  2 +-
 docker/README.md                 |  4 ++--
 docker/greenguard-deployment.yml | 14 +++++++-------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/README.md b/README.md
index 8a3c794..88f2402 100644
--- a/README.md
+++ b/README.md
@@ -71,7 +71,7 @@ If you want to install from source or contribute to the project please read the
 
 **GreenGuard** is prepared to be run inside a docker environment. Please check the
 [docker documantation](https://github.com/signals-dev/GreenGuard/blob/master/docker/README.md)
-about how run **GreenGuard** using docker.
+about how to run **GreenGuard** using docker.
 
 # Data Format
 
diff --git a/docker/README.md b/docker/README.md
index 564ca4c..30232c7 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -6,7 +6,7 @@ This is the command needed to start a Docker container locally that runs a [Jupy
 https://jupyter.org/) already configured to run GreenGuard.
 
 ```bash
-docker run -ti -p8888:8888 signals-dev/greenguard:latest
+docker run -ti -p8888:8888 signals-dev/greenguard:0.2.2.dev0
 ```
 
 Further details about the usage of this image can be found [here](
@@ -28,7 +28,7 @@ metadata:
 spec:
   containers:
   - name: greenguard
-    image: signalsdev/greenguard:latest
+    image: signalsdev/greenguard:0.2.2.dev0
     ports:
     - containerPort: 8888
 ```
diff --git a/docker/greenguard-deployment.yml b/docker/greenguard-deployment.yml
index a51139a..ff195ae 100644
--- a/docker/greenguard-deployment.yml
+++ b/docker/greenguard-deployment.yml
@@ -1,34 +1,34 @@
 apiVersion: v1
 kind: Service
 metadata:
-  name: greenguard-jupyter
+  name: greenguard
 spec:
   ports:
   - name: jupyter
     port: 8888
     nodePort: 30088
   selector:
-    app: greenguard-jupyter
+    app: greenguard
   type: NodePort
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: greenguard-jupyter
+  name: greenguard
 spec:
   selector:
     matchLabels:
-      app: greenguard-jupyter
+      app: greenguard
   strategy:
     type: Recreate
   template:
     metadata:
       labels:
-        app: greenguard-jupyter
+        app: greenguard
     spec:
       containers:
-      - image: pvkdev/greenguard-jupyter:0.2.2.dev0
-        name: greenguard-jupyter
+      - image: signalsdev/greenguard:0.2.2.dev0
+        name: greenguard
         ports:
         - containerPort: 8888
           name: jupyter

From 0827312f80e3a92fd1d15f6dfe49ff94a2748a90 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 25 Jun 2020 10:12:27 +0200
Subject: [PATCH 053/171] Add relative link to docker readme.

---
 README.md        | 2 +-
 docker/README.md | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 88f2402..bbb62f1 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ If you want to install from source or contribute to the project please read the
 ## Docker usage
 
 **GreenGuard** is prepared to be run inside a docker environment. Please check the
-[docker documantation](https://github.com/signals-dev/GreenGuard/blob/master/docker/README.md)
+[docker documantation](docker/README.md)
 about how to run **GreenGuard** using docker.
 
 # Data Format
diff --git a/docker/README.md b/docker/README.md
index 30232c7..6b15766 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -2,7 +2,7 @@
 
 GreenGuard is prepared to be run using [Docker](https://docker.com/).
 
-This is the command needed to start a Docker container locally that runs a [Jupyter Notebook](
+This are the commands needed to start a Docker container locally that runs a [Jupyter Notebook](
 https://jupyter.org/) already configured to run GreenGuard.
 
 ```bash

From fba74a8c592d2731bc7a4d92eca41b46a196f525 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 25 Jun 2020 13:44:23 +0200
Subject: [PATCH 054/171] Syntax

---
 docker/README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/README.md b/docker/README.md
index 6b15766..8706034 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -2,7 +2,7 @@
 
 GreenGuard is prepared to be run using [Docker](https://docker.com/).
 
-This are the commands needed to start a Docker container locally that runs a [Jupyter Notebook](
+These are the commands needed to start a Docker container locally that runs a [Jupyter Notebook](
 https://jupyter.org/) already configured to run GreenGuard.
 
 ```bash

From a582f4beb41471cdf3cb8b1867bd03bd71ec798d Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 25 Jun 2020 15:19:45 +0200
Subject: [PATCH 055/171] Pr Review and comments

---
 Makefile         |  3 +++
 README.md        |  3 +--
 docker/README.md | 41 +++++++++++++++++++++++------------------
 3 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/Makefile b/Makefile
index 96cfec3..b614603 100644
--- a/Makefile
+++ b/Makefile
@@ -226,3 +226,6 @@ docker-push: docker-login docker-build
 	@$(eval VERSION := $(shell python -c 'import greenguard; print(greenguard.__version__)'))
 	docker tag greenguard signalsdev/greenguard:$(VERSION)
 	docker push signalsdev/greenguard:$(VERSION)
+	docker tag greenguard signalsdev/greenguard
+	docker push signalsdev/greenguard
+
diff --git a/README.md b/README.md
index bbb62f1..14dd621 100644
--- a/README.md
+++ b/README.md
@@ -70,8 +70,7 @@ If you want to install from source or contribute to the project please read the
 ## Docker usage
 
 **GreenGuard** is prepared to be run inside a docker environment. Please check the
-[docker documantation](docker/README.md)
-about how to run **GreenGuard** using docker.
+[docker documantation](docker/README.md) for details about how to run **GreenGuard** using docker.
 
 # Data Format
 
diff --git a/docker/README.md b/docker/README.md
index 8706034..631f877 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -6,9 +6,12 @@ These are the commands needed to start a Docker container locally that runs a [J
 https://jupyter.org/) already configured to run GreenGuard.
 
 ```bash
-docker run -ti -p8888:8888 signals-dev/greenguard:0.2.2.dev0
+docker run -ti -p8888:8888 signalsdev/greenguard:latest
 ```
 
+This will start a Jupyter Notebook instance on your computer already configured to use GreenGuard.
+You can access it by pointing your browser at http://127.0.0.1:8888
+
 Further details about the usage of this image can be found [here](
 https://hub.docker.com/repository/docker/signalsdev/greenguard).
 
@@ -16,9 +19,11 @@ https://hub.docker.com/repository/docker/signalsdev/greenguard).
 
 GreenGuard can also be started using [Kubernetes](https://kubernetes.io/).
 
-Here are the minimum steps to do so:
+Here are the minimum steps required to create a POD in a local Kubernetes cluster:
+
+1. Create a yaml file with these contents:
 
-1. Create a POD yaml file with the these contents:
+For this example, we are assuming that the yaml file is named `greegunard-pod.yml`.
 
 ```yml
 apiVersion: v1
@@ -28,33 +33,39 @@ metadata:
 spec:
   containers:
   - name: greenguard
-    image: signalsdev/greenguard:0.2.2.dev0
+    image: signalsdev/greenguard:latest
     ports:
     - containerPort: 8888
 ```
 
-2. Start the POD locally
+2. Create a POD:
+
+After creating the yaml file, you can create a POD in your Kubernetes cluster using the `kubectl`
+command:
 
 ```bash
-kubectl apply -f pod-file.yml
+kubectl apply -f greenguard-pod.yml
 ```
 
 3. Forward the port 8888
 
+After the POD is started, you still need to forward a local port to it in order to access the
+Jupyter instance.
+
 ```bash
 kubectl port-forward greenguard 8888
 ```
 
 4. Point your browser at http://localhost:8888
 
-On the other hand, if you are planing to run GreenGuard on a distributed service, we provided a
-[template file](
-https://github.com/signals-dev/GreenGuard/blob/master/docker/greenguard-deployment.yml)
-that you can use to achieve so.
+> **NOTE**: If GreenGuard is run in a production environment we recommend you to use a service and
+a deployment instead of just a simple POD. You can find a template of this setup [here](
+greenguard-deployment.yml)
 
 ## Building the Docker image from scratch
 
-In order to build the Docker image from scratch you will need to:
+If you want to build the Docker image from scratch instead of using the dockerhub image
+you will need to:
 
 1. Clone the repository
 
@@ -63,18 +74,12 @@ git clone git@github.com:signals-dev/GreenGuard.git
 cd GreenGuard
 ```
 
-2. Build the docker image
+2. Build the docker image using the GreenGuard make command.
 
 ```bash
 make docker-build
 ```
 
-3. If you are generating a new release, you can push to Docker hub using:
-
-```bash
-make docker-push
-```
-
 ## What's next?
 
 For more details about **GreenGuard** and all its possibilities and features, please check the

From 0479cb1a2fbe92118fb7d7a43510e3d952293241 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 25 Jun 2020 15:40:42 +0200
Subject: [PATCH 056/171] Update image version

---
 docker/greenguard-deployment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/greenguard-deployment.yml b/docker/greenguard-deployment.yml
index ff195ae..4736ce5 100644
--- a/docker/greenguard-deployment.yml
+++ b/docker/greenguard-deployment.yml
@@ -27,7 +27,7 @@ spec:
         app: greenguard
     spec:
       containers:
-      - image: signalsdev/greenguard:0.2.2.dev0
+      - image: signalsdev/greenguard:latest
         name: greenguard
         ports:
         - containerPort: 8888

From 18be5ae009c7807d53fc2bcf313816111639ed1e Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 25 Jun 2020 17:45:02 +0200
Subject: [PATCH 057/171] Fix typo

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 14dd621..85ec4ee 100644
--- a/README.md
+++ b/README.md
@@ -70,7 +70,7 @@ If you want to install from source or contribute to the project please read the
 ## Docker usage
 
 **GreenGuard** is prepared to be run inside a docker environment. Please check the
-[docker documantation](docker/README.md) for details about how to run **GreenGuard** using docker.
+[docker documentation](docker/README.md) for details about how to run **GreenGuard** using docker.
 
 # Data Format
 

From d84b83dfc74402b90a5740cd38bcf6f1d54ea910 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 3 Jul 2020 19:28:39 +0200
Subject: [PATCH 058/171] Allow saving splits on disk

---
 greenguard/pipeline.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 135ebbd..d97ec2a 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -3,6 +3,7 @@
 import json
 import logging
 import os
+import pickle
 from copy import deepcopy
 from hashlib import md5
 
@@ -307,15 +308,22 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
             predict = pipeline.predict(X_test, output_=static - 1,
                                        start_=preprocessing, **context)
 
-            splits.append((fold, pipeline, fit, predict, y_test, static))
+            os.makedirs('splits', exist_ok=True)
+            export_path = os.path.join('splits', '{}_{}.pkl'.format(template_name, fold))
+            with open(export_path, 'wb') as split_file:
+                pickle.dump((fold, pipeline, fit, predict, y_test, static), split_file)
+
+            splits.append(export_path)
 
         return splits
 
     def _cross_validate(self, template_splits, hyperparams):
         scores = []
-        for fold, pipeline, fit, predict, y_test, static in template_splits:
-            LOGGER.debug('Scoring fold %s', fold)
+        for split_path in template_splits:
+            with open(split_path, 'rb') as split_file:
+                fold, pipeline, fit, predict, y_test, static = pickle.load(split_file)
 
+            LOGGER.debug('Scoring fold %s', fold)
             pipeline.set_hyperparameters(hyperparams)
             pipeline.fit(start_=static, **fit)
             predictions = pipeline.predict(start_=static, **predict)

From 39c6c3d7f2f8441cec3820ed7c77613f101e469d Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Sun, 5 Jul 2020 19:51:52 +0200
Subject: [PATCH 059/171] Allow saving splits on disk

---
 greenguard/pipeline.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index d97ec2a..e7c17a9 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -1,5 +1,6 @@
 # -*- coding: utf-8 -*-
 
+import gc
 import json
 import logging
 import os
@@ -314,6 +315,8 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
                 pickle.dump((fold, pipeline, fit, predict, y_test, static), split_file)
 
             splits.append(export_path)
+            del fold, pipeline, fit, predict, y_test
+            gc.collect()
 
         return splits
 

From 446b6da61ddcb2eafa68927242f02416158098e1 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 7 Jul 2020 09:53:57 +0200
Subject: [PATCH 060/171] Allow optionally to save splits on disk.

---
 greenguard/pipeline.py | 51 ++++++++++++++++++++++++++++--------------
 1 file changed, 34 insertions(+), 17 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index e7c17a9..288da57 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -139,6 +139,9 @@ class GreenGuardPipeline(object):
                   self.templates.
 
             Defaults to ``0``.
+        cache_path (str):
+            If given, cache the generated cross validation splits in this folder.
+            Defatuls to ``None``.
     """
 
     template = None
@@ -231,7 +234,7 @@ def _build_pipeline(self):
         self.fitted = False
 
     def __init__(self, templates, metric='accuracy', cost=False, init_params=None, stratify=True,
-                 cv_splits=5, shuffle=True, random_state=0, preprocessing=0):
+                 cv_splits=5, shuffle=True, random_state=0, preprocessing=0, cache_path=None):
 
         if isinstance(metric, str):
             metric, cost = METRICS[metric]
@@ -258,6 +261,9 @@ def __init__(self, templates, metric='accuracy', cost=False, init_params=None, s
         self._set_template(self._template_names[0])
         self._hyperparameters = dict()
         self._build_pipeline()
+        self._cache_path = cache_path
+        if cache_path:
+            os.makedirs(cache_path, exist_ok=True)
 
     def get_hyperparameters(self):
         """Get the current hyperparameters.
@@ -291,6 +297,8 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
                                    turbines=turbines, output_=preprocessing - 1)
             del context['X']
             del context['y']
+            gc.collect()
+
         else:
             context = {
                 'readings': readings,
@@ -300,6 +308,7 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
         splits = list()
         for fold, (train_index, test_index) in enumerate(self._cv.split(X, y)):
             LOGGER.debug('Running static steps for fold %s', fold)
+            gc.collect()
             X_train, X_test = X.iloc[train_index], X.iloc[test_index]
             y_train, y_test = y.iloc[train_index], y.iloc[test_index]
 
@@ -309,22 +318,31 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
             predict = pipeline.predict(X_test, output_=static - 1,
                                        start_=preprocessing, **context)
 
-            os.makedirs('splits', exist_ok=True)
-            export_path = os.path.join('splits', '{}_{}.pkl'.format(template_name, fold))
-            with open(export_path, 'wb') as split_file:
-                pickle.dump((fold, pipeline, fit, predict, y_test, static), split_file)
+            split = (fold, pipeline, fit, predict, y_test, static)
 
-            splits.append(export_path)
-            del fold, pipeline, fit, predict, y_test
-            gc.collect()
+            if self._cache_path:
+                split_name = '{}_{}.pkl'.format(template_name, fold)
+                split_path = os.path.join(self._cache_path, split_name)
+
+                with open(split_path, 'wb') as split_file:
+                    pickle.dump(split, split_file)
+
+                split = split_path
 
+            splits.append(split)
+
+        gc.collect()
         return splits
 
     def _cross_validate(self, template_splits, hyperparams):
         scores = []
-        for split_path in template_splits:
-            with open(split_path, 'rb') as split_file:
-                fold, pipeline, fit, predict, y_test, static = pickle.load(split_file)
+        for split in template_splits:
+            gc.collect()
+            if self._cache_path:
+                with open(split, 'rb') as split_file:
+                    split = pickle.load(split_file)
+
+            fold, pipeline, fit, predict, y_test, static = split
 
             LOGGER.debug('Scoring fold %s', fold)
             pipeline.set_hyperparameters(hyperparams)
@@ -339,15 +357,14 @@ def _cross_validate(self, template_splits, hyperparams):
 
     def _make_btb_scorer(self, target_times, readings, turbines):
         splits = {}
+        for name in self._template_names:
+            splits[name] = self._generate_splits(name, target_times, readings, turbines)
+
+        del target_times, readings, turbines
+        gc.collect()
 
         def scorer(template_name, config):
             template_splits = splits.get(template_name)
-            if template_splits is None:
-                template_splits = self._generate_splits(
-                    template_name, target_times, readings, turbines)
-
-                splits[template_name] = template_splits
-
             cv_score = self._cross_validate(template_splits, config)
             if self._is_better(cv_score):
                 _config = '\n'.join('      {}: {}'.format(n, v) for n, v in config.items())

From 8e49e9f56d2fa59a347c300c89dc01c1b9e8820e Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 10 Jul 2020 11:16:02 +0200
Subject: [PATCH 061/171] Patch keras to be pickable on splits

---
 greenguard/pipeline.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 288da57..8735e7b 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -5,15 +5,18 @@
 import logging
 import os
 import pickle
+import tempfile
 from copy import deepcopy
 from hashlib import md5
 
 import cloudpickle
+import keras
 import numpy as np
 from btb import BTBSession
 from btb.tuning import Tunable
 from mlblocks import MLPipeline
 from mlblocks.discovery import load_pipeline
+from mlprimitives.adapters.keras import Sequential
 from sklearn.exceptions import NotFittedError
 from sklearn.model_selection import KFold, StratifiedKFold
 
@@ -25,6 +28,32 @@
 PIPELINES_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), 'pipelines'))
 
 
+# Patch Keras to save on disk without a model trained
+def __getstate__(self):
+    state = self.__dict__.copy()
+    if 'model' in state:
+        with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
+            keras.models.save_model(state.pop('model'), fd.name, overwrite=True)
+            state['model_str'] = fd.read()
+
+    return state
+
+
+def __setstate__(self, state):
+    if 'model_str' in state:
+        with tempfile.NamedTemporaryFile(suffix='.hdf5', delete=True) as fd:
+            fd.write(state.pop('model_str'))
+            fd.flush()
+
+            state['model'] = keras.models.load_model(fd.name)
+
+    self.__dict__ = state
+
+
+Sequential.__getstate__ = __getstate__
+Sequential.__setstate__ = __setstate__
+
+
 def get_pipelines(pattern='', path=False, unstacked=False):
     """Get the list of available pipelines.
 

From cc20522a1c23198f88435dcf2fe0bd90d5360e77 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 10 Jul 2020 11:36:22 +0200
Subject: [PATCH 062/171] Add keras as we are importing it in our module to
 patch it.

---
 setup.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/setup.py b/setup.py
index 67516b5..c02e9a9 100644
--- a/setup.py
+++ b/setup.py
@@ -17,6 +17,7 @@
 
 install_requires = [
     'mlblocks>=0.3.4,<0.4',
+    'Keras>=2.1.6,<2.4',
     'mlprimitives>=0.2.4,<0.3',
     'scipy>=1.0.1,<1.4.0',
     'baytune>=0.3.9,<0.4',

From eb3d5c9b68dcbe74c3efe798d3100c57f06de3a1 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 10 Jul 2020 11:43:38 +0200
Subject: [PATCH 063/171] Move notebooks to tutorials

---
 .../01_GreenGuard_Machine_Learning.ipynb      | 349 +++---------------
 .../02_Extract_Readings.ipynb                 |   8 +-
 2 files changed, 63 insertions(+), 294 deletions(-)
 rename notebooks/1. GreenGuard Quickstart.ipynb => tutorials/01_GreenGuard_Machine_Learning.ipynb (63%)
 rename notebooks/2. Extract Readings.ipynb => tutorials/02_Extract_Readings.ipynb (99%)

diff --git a/notebooks/1. GreenGuard Quickstart.ipynb b/tutorials/01_GreenGuard_Machine_Learning.ipynb
similarity index 63%
rename from notebooks/1. GreenGuard Quickstart.ipynb
rename to tutorials/01_GreenGuard_Machine_Learning.ipynb
index ec7c0a7..7738871 100644
--- a/notebooks/1. GreenGuard Quickstart.ipynb	
+++ b/tutorials/01_GreenGuard_Machine_Learning.ipynb
@@ -4,21 +4,24 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# GreenGuard Quickstart"
+    "# GreenGuard Machine Learning"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This notebook shows how to use GreenGuard to:\n",
+    "In this tutorial we will show you how to use GreenGuard to solve a Machine Learning problem\n",
+    "defined via a Target Times table.\n",
     "\n",
-    "- Load demo data\n",
+    "During the next steps we will:\n",
+    "\n",
+    "- Load demo target times and readings\n",
     "- Find available pipelines and load two of them as templates\n",
-    "- Tune the templates to find the best template for the given data and its hyperparameters\n",
-    "- Fit the found pipeline to our data\n",
-    "- Make predictions using the pipeline\n",
-    "- Evaluate the goodness-of-fit"
+    "- Use GreenGuard AutoML to select the best template and hyperparameters for our problem\n",
+    "- Build and fit a Machine Learning pipeline based on the found template and hyperparameters\n",
+    "- Make predictions using the fitted pipeline\n",
+    "- Evaluate how good the predictions are"
    ]
   },
   {
@@ -393,7 +396,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## 3. Finding the Templates\n",
+    "## 3. Finding the available Templates\n",
     "\n",
     "The next step will be to select a collection of templates from the ones\n",
     "available in GreenGuard.\n",
@@ -411,13 +414,13 @@
     {
      "data": {
       "text/plain": [
-       "['resample_600s_unstack_144_lstm_timeseries_classifier',\n",
+       "['resample_600s_normalize_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_double_144_lstm_timeseries_classifier',\n",
        " 'resample_3600s_unstack_24_lstm_timeseries_classifier',\n",
-       " 'resample_600s_unstack_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_normalize_dfs_1d_xgb_classifier',\n",
        " 'resample_3600s_unstack_double_24_lstm_timeseries_classifier',\n",
-       " 'resample_600s_unstack_double_144_lstm_timeseries_classifier',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier']"
+       " 'resample_600s_unstack_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_144_lstm_timeseries_classifier']"
       ]
      },
      "execution_count": 10,
@@ -446,9 +449,9 @@
     {
      "data": {
       "text/plain": [
-       "['resample_600s_unstack_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_normalize_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier']"
+       "['resample_600s_normalize_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       " 'resample_600s_unstack_dfs_1d_xgb_classifier']"
       ]
      },
      "execution_count": 11,
@@ -476,9 +479,9 @@
     {
      "data": {
       "text/plain": [
-       "{'resample_600s_unstack_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/app/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json'}"
+       "{'resample_600s_normalize_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json',\n",
+       " 'resample_600s_unstack_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json'}"
       ]
      },
      "execution_count": 12,
@@ -584,14 +587,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:btb.session:Obtaining default configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n"
+      "2020-07-10 11:39:49,290 - INFO - session - Obtaining default configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n"
      ]
     },
     {
@@ -599,21 +602,21 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 00:41 | Progress: 100%|██████████\n",
-      "Elapsed: 00:18 | Progress: 100%|██████████\n",
+      "Elapsed: 00:32 | Progress: 100%|██████████\n",
+      "Elapsed: 00:16 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 00:37 | Progress: 100%|██████████\n",
-      "Elapsed: 00:18 | Progress: 100%|██████████\n",
+      "Elapsed: 00:32 | Progress: 100%|██████████\n",
+      "Elapsed: 00:16 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 00:37 | Progress: 100%|██████████\n",
-      "Elapsed: 00:18 | Progress: 100%|██████████\n"
+      "Elapsed: 00:32 | Progress: 100%|██████████\n",
+      "Elapsed: 00:15 | Progress: 100%|██████████\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "2020-07-10 11:42:19,561 - INFO - pipeline - New configuration found:\n",
       "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
@@ -622,8 +625,8 @@
       "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
       "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
       "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6079987550575785\n",
-      "INFO:btb.session:Obtaining default configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n"
+      "2020-07-10 11:42:19,563 - INFO - session - New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.604136604136604\n",
+      "2020-07-10 11:42:19,565 - INFO - session - Obtaining default configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n"
      ]
     },
     {
@@ -631,63 +634,8 @@
      "output_type": "stream",
      "text": [
       "Built 99 features\n",
-      "Elapsed: 02:06 | Progress: 100%|██████████\n",
-      "Elapsed: 01:02 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 01:53 | Progress: 100%|██████████\n",
-      "Elapsed: 00:54 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 01:55 | Progress: 100%|██████████\n",
-      "Elapsed: 01:10 | Progress: 100%|██████████\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 9\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 28\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 4\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3977560491030686\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.19143248884807773\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 8\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6418782052584869\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 14\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 18\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 5\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39294364912150626\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.3393295330438333\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 9\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6671775409915827\n"
+      "Elapsed: 00:45 | Progress:  53%|█████▎    "
      ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'id': '2a494af25e2d986c9178fd47820d4b00',\n",
-       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
-       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 14,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 18,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 5,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39294364912150626,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.3393295330438333,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 9},\n",
-       " 'score': 0.6671775409915827}"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -704,29 +652,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'id': '2a494af25e2d986c9178fd47820d4b00',\n",
-       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
-       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 14,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 18,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 5,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39294364912150626,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.3393295330438333,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 9},\n",
-       " 'score': 0.6671775409915827}"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "session.best_proposal"
    ]
@@ -740,26 +668,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 14,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 18,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 5,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39294364912150626,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.3393295330438333,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 9}"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "pipeline.get_hyperparameters()"
    ]
@@ -773,20 +684,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'resample_600s_unstack_normalize_dfs_1d_xgb_classifier'"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "pipeline.template_name"
    ]
@@ -801,20 +701,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.6671775409915827"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "pipeline.cv_score"
    ]
@@ -830,101 +719,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 99\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 143\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 9\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.06337107325877978\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.932864412690726\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 10\n",
-      "INFO:btb.session:New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6854149434794596\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "{'id': '9999fcb9fdc53cf7bf8f1398cea07fab',\n",
-       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
-       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 99,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 143,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 9,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.06337107325877978,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.932864412690726,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
-       " 'score': 0.6854149434794596}"
-      ]
-     },
-     "execution_count": 21,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "session.run(iterations=10)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.6854149434794596"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "pipeline.cv_score"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 99,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 143,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 9,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.06337107325877978,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.932864412690726,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10}"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "pipeline.get_hyperparameters()"
    ]
@@ -944,18 +759,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Built 165 features\n",
-      "Elapsed: 00:48 | Progress: 100%|██████████\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "pipeline.fit(train, readings)"
    ]
@@ -971,17 +777,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Elapsed: 00:17 | Progress: 100%|██████████\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "predictions = pipeline.predict(test, readings)"
    ]
@@ -995,20 +793,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.7346938775510203"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "from sklearn.metrics import f1_score\n",
     "\n",
@@ -1033,7 +820,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1052,7 +839,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1068,27 +855,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Elapsed: 00:19 | Progress: 100%|██████████\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "array([0, 0, 0, 1, 0])"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
     "predictions = new_pipeline.predict(test, readings)\n",
     "predictions[0:5]"
@@ -1111,7 +880,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.6.9"
   }
  },
  "nbformat": 4,
diff --git a/notebooks/2. Extract Readings.ipynb b/tutorials/02_Extract_Readings.ipynb
similarity index 99%
rename from notebooks/2. Extract Readings.ipynb
rename to tutorials/02_Extract_Readings.ipynb
index db55927..a454648 100644
--- a/notebooks/2. Extract Readings.ipynb	
+++ b/tutorials/02_Extract_Readings.ipynb
@@ -6,13 +6,13 @@
    "source": [
     "# Extract Readings\n",
     "\n",
-    "This notebook shows how to use the CSVLoader class to load the readings table from a folder\n",
-    "that contains readings in the raw CSV format.\n",
+    "In this tutorial we will show you how to use the CSVLoader class to load the readings table\n",
+    "from a folder that contains readings in the raw CSV format.\n",
     "\n",
     "The Raw CSV format es briefly explained below, but more details can be found in [the documentation site](\n",
     "https://signals-dev.github.io/GreenGuard/advanced_usage/csv.html)/n",
     "\n",
-    "In this notebook we will:\n",
+    "During the next steps we will:\n",
     "\n",
     "- Generate a folder with readings in the raw format based on the demo data\n",
     "- Explore the raw format\n",
@@ -1366,7 +1366,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.10"
+   "version": "3.6.9"
   }
  },
  "nbformat": 4,

From f4c9a62ad451ad646ca33d40da58abf75e6c47eb Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 10 Jul 2020 11:47:09 +0200
Subject: [PATCH 064/171] Update repo config and add github actions

---
 .github/workflows/docs.yml  | 29 ++++++++++++++
 .github/workflows/tests.yml | 40 +++++++++++++++++++
 .gitignore                  |  1 +
 .travis.yml                 | 18 ++-------
 Makefile                    | 78 ++++++++++++++++++++++++++++---------
 docs/conf.py                |  4 ++
 docs/index.rst              |  7 ++++
 setup.py                    | 44 ++++++++++-----------
 tox.ini                     | 41 ++++++++-----------
 9 files changed, 182 insertions(+), 80 deletions(-)
 create mode 100644 .github/workflows/docs.yml
 create mode 100644 .github/workflows/tests.yml

diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml
new file mode 100644
index 0000000..02e92fd
--- /dev/null
+++ b/.github/workflows/docs.yml
@@ -0,0 +1,29 @@
+name: Generate Docs
+
+on:
+  push:
+    branches: [ master ]
+
+jobs:
+
+  docs:
+    runs-on: ubuntu-latest
+    steps:
+    - uses: actions/checkout@v2
+
+    - name: Python
+      uses: actions/setup-python@v1
+      with:
+        python-version: '3.7'
+
+    - name: Build
+      run: |
+        sudo apt install pandoc
+        python -m pip install --upgrade pip
+        pip install -e .[dev]
+        make docs
+    - name: Deploy
+      uses: peaceiris/actions-gh-pages@v3
+      with:
+        github_token: ${{secrets.GITHUB_TOKEN}}
+        publish_dir: docs/_build/html
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
new file mode 100644
index 0000000..29c71ac
--- /dev/null
+++ b/.github/workflows/tests.yml
@@ -0,0 +1,40 @@
+name: Run Tests
+
+on:
+  push:
+    branches: [ '*' ]
+  pull_request:
+    branches: [ master ]
+
+jobs:
+  build:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.5, 3.6, 3.7]
+        os: [ubuntu-latest, macos-latest]
+
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - if: matrix.os == 'ubuntu-latest'
+      name: Install graphviz - Ubuntu
+      run: |
+        sudo apt-get install pandoc
+
+    - if: matrix.os == 'macos-latest'
+      name: Install graphviz - MacOS
+      run: |
+        brew install pandoc
+
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install tox tox-gh-actions
+
+    - name: Test with tox
+      run: tox
diff --git a/.gitignore b/.gitignore
index fc59bb2..bcf1b75 100644
--- a/.gitignore
+++ b/.gitignore
@@ -65,6 +65,7 @@ instance/
 # Sphinx documentation
 docs/_build/
 docs/api/
+docs/tutorials/
 
 # PyBuilder
 target/
diff --git a/.travis.yml b/.travis.yml
index 9cbca5a..4cefe52 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,5 +1,5 @@
 # Config file for automatic testing at travis-ci.org
-dist: xenial
+dist: bionic
 language: python
 python:
   - 3.7
@@ -7,21 +7,11 @@ python:
   - 3.5
 
 # Command to install dependencies
-install: pip install -U tox-travis codecov
+install:
+  - sudo apt-get install pandoc
+  - pip install -U tox-travis codecov
 
 after_success: codecov
 
 # Command to run tests
 script: tox
-
-deploy:
-
-  - provider: pages
-    skip-cleanup: true
-    github-token: "$GITHUB_TOKEN"
-    keep-history: true
-    local-dir: docs/_build/html
-    target-branch: gh-pages
-    on:
-      branch: master
-      python: 3.6
diff --git a/Makefile b/Makefile
index b614603..b489087 100644
--- a/Makefile
+++ b/Makefile
@@ -50,6 +50,7 @@ clean-pyc: ## remove Python file artifacts
 .PHONY: clean-docs
 clean-docs: ## remove previously built docs
 	rm -f docs/api/*.rst
+	rm -rf docs/tutorials
 	-$(MAKE) -C docs clean 2>/dev/null  # this fails if sphinx is not yet installed
 
 .PHONY: clean-coverage
@@ -91,24 +92,32 @@ lint: ## check style with flake8 and isort
 
 .PHONY: fix-lint
 fix-lint: ## fix lint issues using autoflake, autopep8, and isort
-	find greenguard -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables
-	autopep8 --in-place --recursive --aggressive greenguard
-	isort --apply --atomic --recursive greenguard
-
-	find tests -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables
-	autopep8 --in-place --recursive --aggressive tests
-	isort --apply --atomic --recursive tests
+	find greenguard tests -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables
+	autopep8 --in-place --recursive --aggressive greenguard tests
+	isort --apply --atomic --recursive greenguard tests
 
 
 # TEST TARGETS
 
-.PHONY: test
-test: ## run tests quickly with the default Python
+.PHONY: test-unit
+test-unit: ## run tests quickly with the default Python
 	python -m pytest --basetemp=${ENVTMPDIR} --cov=greenguard
 
 .PHONY: test-readme
 test-readme: ## run the readme snippets
-	rundoc run --single-session python3 -t python3 README.md
+	rm -rf tests/readme_test && mkdir tests/readme_test
+	cd tests/readme_test && rundoc run --single-session python3 -t python3 ../../README.md
+	rm -rf tests/readme_test
+
+.PHONY: test-tutorials
+test-tutorials: ## run the tutorial notebooks
+	jupyter nbconvert --execute --ExecutePreprocessor.timeout=600 tutorials/*.ipynb --stdout > /dev/null
+
+.PHONY: test
+test: test-unit test-readme ## test everything that needs test dependencies
+
+.PHONY: test-devel
+test-devel: lint docs ## test everything that needs development dependencies
 
 .PHONY: test-all
 test-all: ## run tests on every Python version with tox
@@ -126,6 +135,7 @@ coverage: ## check code coverage quickly with the default Python
 
 .PHONY: docs
 docs: clean-docs ## generate Sphinx HTML documentation, including API docs
+	cp -r tutorials docs/tutorials
 	sphinx-apidoc --separate --no-toc -o docs/api/ greenguard
 	$(MAKE) -C docs html
 
@@ -146,12 +156,19 @@ dist: clean ## builds source and wheel package
 	python setup.py bdist_wheel
 	ls -l dist
 
-.PHONY: test-publish
-test-publish: dist ## package and upload a release on TestPyPI
+.PHONY: publish-confirm
+publish-confirm:
+	@echo "WARNING: This will irreversibly upload a new version to PyPI!"
+	@echo -n "Please type 'confirm' to proceed: " \
+		&& read answer \
+		&& [ "$${answer}" = "confirm" ]
+
+.PHONY: publish-test
+publish-test: dist publish-confirm ## package and upload a release on TestPyPI
 	twine upload --repository-url https://test.pypi.org/legacy/ dist/*
 
 .PHONY: publish
-publish: dist ## package and upload a release
+publish: dist publish-confirm ## package and upload a release
 	twine upload dist/*
 
 .PHONY: bumpversion-release
@@ -161,6 +178,13 @@ bumpversion-release: ## Merge master to stable and bumpversion release
 	bumpversion release
 	git push --tags origin stable
 
+.PHONY: bumpversion-release-test
+bumpversion-release-test: ## Merge master to stable and bumpversion release
+	git checkout stable || git checkout -b stable
+	git merge --no-ff master -m"make release-tag: Merge branch 'master' into stable"
+	bumpversion release --no-tag
+	@echo git push --tags origin stable
+
 .PHONY: bumpversion-patch
 bumpversion-patch: ## Merge stable to master and bumpversion patch
 	git checkout master
@@ -168,6 +192,10 @@ bumpversion-patch: ## Merge stable to master and bumpversion patch
 	bumpversion --no-tag patch
 	git push
 
+.PHONY: bumpversion-candidate
+bumpversion-candidate: ## Bump the version to the next candidate
+	bumpversion candidate --no-tag
+
 .PHONY: bumpversion-minor
 bumpversion-minor: ## Bump the version the next minor skipping the release
 	bumpversion --no-tag minor
@@ -176,13 +204,21 @@ bumpversion-minor: ## Bump the version the next minor skipping the release
 bumpversion-major: ## Bump the version the next major skipping the release
 	bumpversion --no-tag major
 
-.PHONY: bumpversion-candidate
-bumpversion-candidate: ## Bump the version to the next candidate
-	bumpversion candidate --no-tag
+.PHONY: bumpversion-revert
+bumpversion-revert: ## Undo a previous bumpversion-release
+	git checkout master
+	git branch -D stable
 
+CLEAN_DIR := $(shell git status --short | grep -v ??)
 CURRENT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
 CHANGELOG_LINES := $(shell git diff HEAD..origin/stable HISTORY.md 2>&1 | wc -l)
 
+.PHONY: check-clean
+check-clean: ## Check if the directory has uncommitted changes
+ifneq ($(CLEAN_DIR),)
+	$(error There are uncommitted changes)
+endif
+
 .PHONY: check-master
 check-master: ## Check if we are in master branch
 ifneq ($(CURRENT_BRANCH),master)
@@ -196,14 +232,21 @@ ifeq ($(CHANGELOG_LINES),0)
 endif
 
 .PHONY: check-release
-check-release: check-master check-history ## Check if the release can be made
+check-release: check-clean check-master check-history ## Check if the release can be made
+	@echo "A new release can be made"
 
 .PHONY: release
 release: check-release bumpversion-release publish bumpversion-patch
 
+.PHONY: release-test
+release-test: check-release bumpversion-release-test publish-test bumpversion-revert
+
 .PHONY: release-candidate
 release-candidate: check-master publish bumpversion-candidate
 
+.PHONY: release-candidate-test
+release-candidate-test: check-clean check-master publish-test
+
 .PHONY: release-minor
 release-minor: check-release bumpversion-minor release
 
@@ -228,4 +271,3 @@ docker-push: docker-login docker-build
 	docker push signalsdev/greenguard:$(VERSION)
 	docker tag greenguard signalsdev/greenguard
 	docker push signalsdev/greenguard
-
diff --git a/docs/conf.py b/docs/conf.py
index 733e1a9..9e23c07 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -32,6 +32,7 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones.
 extensions = [
     'm2r',
+    'nbsphinx',
     'sphinx.ext.autodoc',
     'sphinx.ext.githubpages',
     'sphinx.ext.viewcode',
@@ -53,6 +54,9 @@
 # The master toctree document.
 master_doc = 'index'
 
+# Jupyter Notebooks
+nbsphinx_execute = 'never'
+
 # General information about the project.
 project = 'GreenGuard'
 slug = 'greenguard'
diff --git a/docs/index.rst b/docs/index.rst
index a654f0e..dad6c5f 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -6,6 +6,13 @@
 
    Overview <readme>
 
+.. toctree::
+   :caption: Tutorials
+   :hidden:
+
+   tutorials/01_GreenGuard_Quickstart
+   tutorials/02_Extract_Readings
+
 .. toctree::
    :caption: Advanced Usage
    :hidden:
diff --git a/setup.py b/setup.py
index 67516b5..51f9880 100644
--- a/setup.py
+++ b/setup.py
@@ -4,13 +4,13 @@
 from setuptools import setup, find_packages
 
 try:
-    with open('README.md') as readme_file:
+    with open('README.md', encoding='utf-8') as readme_file:
         readme = readme_file.read()
 except IOError:
     readme = ''
 
 try:
-    with open('HISTORY.md') as history_file:
+    with open('HISTORY.md', encoding='utf-8') as history_file:
         history = history_file.read()
 except IOError:
     history = ''
@@ -33,44 +33,42 @@
 tests_require = [
     'pytest>=3.4.2',
     'pytest-cov>=2.6.0',
-    'rundoc>=0.4.3'
+    'jupyter>=1.0.0,<2',
+    'rundoc>=0.4.3,<0.5',
 ]
 
 development_requires = [
     # general
-    'bumpversion>=0.5.3',
+    'bumpversion>=0.5.3,<0.6',
     'pip>=9.0.1',
-    'watchdog>=0.8.3',
+    'watchdog>=0.8.3,<0.11',
 
     # docs
-    'm2r>=0.2.0',
-    'Sphinx>=1.7.1,<2.4',
-    'sphinx_rtd_theme>=0.2.4',
+    'm2r>=0.2.0,<0.3',
+    'nbsphinx>=0.5.0,<0.7',
+    'Sphinx>=1.7.1,<3',
+    'sphinx_rtd_theme>=0.2.4,<0.5',
     'autodocsumm>=0.1.10',
 
     # style check
-    'flake8>=3.7.7',
-    'isort>=4.3.4',
+    'flake8>=3.7.7,<4',
+    'isort>=4.3.4,<5',
 
     # fix style issues
-    'autoflake>=1.2',
-    'autopep8>=1.4.3',
+    'autoflake>=1.1,<2',
+    'autopep8>=1.4.3,<2',
 
     # distribute on PyPI
-    'twine>=1.10.0',
+    'twine>=1.10.0,<4',
     'wheel>=0.30.0',
 
     # Advanced testing
-    'coverage>=4.5.1',
-    'tox>=2.9.1',
-
-    # Jupyter
-    'jupyter>=1.0.0',
+    'coverage>=4.5.1,<6',
+    'tox>=2.9.1,<4',
 ]
 
-
 setup(
-    author="MIT Data To AI Lab",
+    author='MIT Data To AI Lab',
     author_email='dailabmit@gmail.com',
     classifiers=[
         'Development Status :: 2 - Pre-Alpha',
@@ -82,7 +80,7 @@
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
     ],
-    description="AutoML for Renewable Energy Industries.",
+    description='AutoML for Renewable Energy Industries.',
     entry_points={
         'mlblocks': [
             'pipelines=greenguard:MLBLOCKS_PIPELINES'
@@ -95,12 +93,12 @@
     include_package_data=True,
     install_requires=install_requires,
     keywords='wind machine learning greenguard',
-    license="MIT license",
+    license='MIT license',
     long_description=readme + '\n\n' + history,
     long_description_content_type='text/markdown',
     name='greenguard',
     packages=find_packages(include=['greenguard', 'greenguard.*']),
-    python_requires='>=3.5',
+    python_requires='>=3.5,<3.8',
     setup_requires=setup_requires,
     test_suite='tests',
     tests_require=tests_require,
diff --git a/tox.ini b/tox.ini
index de5cd07..31724c5 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,38 +1,29 @@
 [tox]
-envlist = py35, py36, py37, lint, docs, readme
-
+envlist = py{35,36,37}, test-devel
 
 [travis]
 python =
-    3.7: py37
-    3.6: py36, docs, lint, readme
+    3.7: py37, test-devel
+    3.6: py36
     3.5: py35
 
+[gh-actions]
+python =
+    3.7: py37, test-devel
+    3.6: py36
+    3.5: py35
 
 [testenv]
 passenv = CI TRAVIS TRAVIS_*
-setenv =
-    PYTHONPATH = {toxinidir}
-extras = test
-commands =
-    /usr/bin/env make test
-
-
-[testenv:lint]
 skipsdist = true
-extras = dev
+skip_install = true
+commands_pre =
+    /usr/bin/env pip install .[test]
 commands =
-    /usr/bin/env make lint
-
-
-[testenv:docs]
-skipsdist = true
-extras = dev
-commands =
-    /usr/bin/env make docs
-
+    /usr/bin/env make test
 
-[testenv:readme]
-skipsdist = true
+[testenv:test-devel]
+commands_pre =
+    /usr/bin/env pip install .[dev]
 commands =
-    /usr/bin/env make test-readme
+    /usr/bin/env make test-devel

From 17bd968535ddded981bbfd0129971b753b35f26a Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 10 Jul 2020 11:51:04 +0200
Subject: [PATCH 065/171] Update tutorial

---
 .gitignore                                    |   1 +
 .../01_GreenGuard_Machine_Learning.ipynb      | 286 ++++++++++++++++--
 2 files changed, 261 insertions(+), 26 deletions(-)

diff --git a/.gitignore b/.gitignore
index bcf1b75..f0a4be1 100644
--- a/.gitignore
+++ b/.gitignore
@@ -111,3 +111,4 @@ notebooks/
 notebooks-private/
 scripts/
 dask-worker-space/
+tutorials/*.pkl
diff --git a/tutorials/01_GreenGuard_Machine_Learning.ipynb b/tutorials/01_GreenGuard_Machine_Learning.ipynb
index 7738871..e17f0b2 100644
--- a/tutorials/01_GreenGuard_Machine_Learning.ipynb
+++ b/tutorials/01_GreenGuard_Machine_Learning.ipynb
@@ -587,7 +587,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -634,8 +634,53 @@
      "output_type": "stream",
      "text": [
       "Built 99 features\n",
-      "Elapsed: 00:45 | Progress:  53%|█████▎    "
+      "Elapsed: 01:28 | Progress: 100%|██████████\n",
+      "Elapsed: 00:45 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 01:29 | Progress: 100%|██████████\n",
+      "Elapsed: 00:47 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 01:32 | Progress: 100%|██████████\n",
+      "Elapsed: 00:48 | Progress: 100%|██████████\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-07-10 11:49:21,971 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:22,446 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:22,682 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:22,862 - INFO - pipeline - New configuration found:\n",
+      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 16\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 82\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39699298238763425\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.06238180737748478\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
+      "2020-07-10 11:49:22,864 - INFO - session - New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6110894266631971\n"
      ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'id': '6cbe94178d761b5c263dc2f7ab1f8205',\n",
+       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
+       "   'max_labels'): 16,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 82,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39699298238763425,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.06238180737748478,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
+       " 'score': 0.6110894266631971}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
@@ -652,9 +697,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'id': '6cbe94178d761b5c263dc2f7ab1f8205',\n",
+       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
+       "   'max_labels'): 16,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 82,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39699298238763425,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.06238180737748478,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
+       " 'score': 0.6110894266631971}"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "session.best_proposal"
    ]
@@ -668,9 +733,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
+       "  'max_labels'): 16,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 82,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39699298238763425,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.06238180737748478,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 1}"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "pipeline.get_hyperparameters()"
    ]
@@ -684,9 +766,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 19,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'resample_600s_unstack_normalize_dfs_1d_xgb_classifier'"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "pipeline.template_name"
    ]
@@ -701,9 +794,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 20,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6110894266631971"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "pipeline.cv_score"
    ]
@@ -719,27 +823,111 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-07-10 11:49:22,952 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:23,246 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:23,464 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:23,668 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:23,791 - INFO - pipeline - New configuration found:\n",
+      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 80\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 31\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 4\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.32814385597842255\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.19795099494663482\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
+      "2020-07-10 11:49:23,792 - INFO - session - New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6297458681170419\n",
+      "2020-07-10 11:49:23,796 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:23,955 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:24,191 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:24,403 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:24,546 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:25,544 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
+      "2020-07-10 11:49:25,698 - INFO - pipeline - New configuration found:\n",
+      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 96\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 36\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 9\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3256576169027807\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.1061546068995437\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
+      "2020-07-10 11:49:25,699 - INFO - session - New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6306697372853741\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'id': '157087395a2643c9ecc4a2b3549a1fc9',\n",
+       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
+       "   'max_labels'): 96,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 36,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 9,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3256576169027807,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.1061546068995437,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
+       " 'score': 0.6306697372853741}"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "session.run(iterations=10)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 22,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6306697372853741"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "pipeline.cv_score"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 23,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
+       "  'max_labels'): 96,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 36,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 9,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3256576169027807,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.1061546068995437,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 1}"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "pipeline.get_hyperparameters()"
    ]
@@ -759,9 +947,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 24,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Built 165 features\n",
+      "Elapsed: 00:37 | Progress: 100%|██████████\n"
+     ]
+    }
+   ],
    "source": [
     "pipeline.fit(train, readings)"
    ]
@@ -777,9 +974,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 25,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Elapsed: 00:12 | Progress: 100%|██████████\n"
+     ]
+    }
+   ],
    "source": [
     "predictions = pipeline.predict(test, readings)"
    ]
@@ -793,9 +998,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 26,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7307692307692306"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "from sklearn.metrics import f1_score\n",
     "\n",
@@ -820,7 +1036,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -839,7 +1055,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -855,9 +1071,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 29,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Elapsed: 00:12 | Progress: 100%|██████████\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([0, 0, 0, 1, 0])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "predictions = new_pipeline.predict(test, readings)\n",
     "predictions[0:5]"

From 4c017e32da5096488be26c5053acf83b1840362b Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 10 Jul 2020 11:55:59 +0200
Subject: [PATCH 066/171] Remove macos from github actions testing

---
 .github/workflows/tests.yml | 13 ++-----------
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 29c71ac..093fa94 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -12,7 +12,7 @@ jobs:
     strategy:
       matrix:
         python-version: [3.5, 3.6, 3.7]
-        os: [ubuntu-latest, macos-latest]
+        os: [ubuntu-latest]
 
     steps:
     - uses: actions/checkout@v1
@@ -21,18 +21,9 @@ jobs:
       with:
         python-version: ${{ matrix.python-version }}
 
-    - if: matrix.os == 'ubuntu-latest'
-      name: Install graphviz - Ubuntu
-      run: |
-        sudo apt-get install pandoc
-
-    - if: matrix.os == 'macos-latest'
-      name: Install graphviz - MacOS
-      run: |
-        brew install pandoc
-
     - name: Install dependencies
       run: |
+        sudo apt-get install pandoc
         python -m pip install --upgrade pip
         pip install tox tox-gh-actions
 

From bb7e23a1261cddec94795afb79e1e64e28e55c43 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 10 Jul 2020 12:15:06 +0200
Subject: [PATCH 067/171] Reverse split generation on the scorer

---
 greenguard/pipeline.py | 11 ++++++-----
 setup.py               |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 8735e7b..b4c9c4c 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -386,14 +386,15 @@ def _cross_validate(self, template_splits, hyperparams):
 
     def _make_btb_scorer(self, target_times, readings, turbines):
         splits = {}
-        for name in self._template_names:
-            splits[name] = self._generate_splits(name, target_times, readings, turbines)
-
-        del target_times, readings, turbines
-        gc.collect()
 
         def scorer(template_name, config):
             template_splits = splits.get(template_name)
+            if template_splits is None:
+                template_splits = self._generate_splits(
+                    template_name, target_times, readings, turbines)
+
+                splits[template_name] = template_splits
+
             cv_score = self._cross_validate(template_splits, config)
             if self._is_better(cv_score):
                 _config = '\n'.join('      {}: {}'.format(n, v) for n, v in config.items())
diff --git a/setup.py b/setup.py
index c02e9a9..d78848e 100644
--- a/setup.py
+++ b/setup.py
@@ -51,7 +51,7 @@
 
     # style check
     'flake8>=3.7.7',
-    'isort>=4.3.4',
+    'isort>=4.3.4,<5',
 
     # fix style issues
     'autoflake>=1.2',

From ce24e261207b6ab8def66a10997a48c5d751d19d Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 10 Jul 2020 12:15:59 +0200
Subject: [PATCH 068/171] Update tutorials link

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 85ec4ee..db84cb8 100644
--- a/README.md
+++ b/README.md
@@ -281,5 +281,5 @@ f1_score(test_targets, predictions)
 
 For more details about **GreenGuard** and all its possibilities and features, please check the
 [project documentation site](https://signals-dev.github.io/GreenGuard/)
-Also do not forget to have a look at the [notebook tutorials](
-https://github.com/signals-dev/GreenGuard/tree/master/notebooks)!
+Also do not forget to have a look at the [tutorials](
+https://github.com/signals-dev/GreenGuard/tree/master/tutorials)!

From 8323ec16fc09c4c298491ad9c4f0d09778e06d95 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 10 Jul 2020 12:58:05 +0200
Subject: [PATCH 069/171] Add release notes for v0.2.2

---
 HISTORY.md | 10 ++++++++++
 Makefile   |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index c5a9de0..7e1e8ae 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,15 @@
 # History
 
+## 0.2.2 - 2020-07-10
+
+### Internam Imrpovements
+
+* Added github actions.
+
+### Resolved Issues
+
+* Issue #27: Cache Splits pre-processed data on disk
+
 ## 0.2.1 - 2020-06-16
 
 With this release we give the possibility to the user to specify more than one template when
diff --git a/Makefile b/Makefile
index b489087..0ee820a 100644
--- a/Makefile
+++ b/Makefile
@@ -236,7 +236,7 @@ check-release: check-clean check-master check-history ## Check if the release ca
 	@echo "A new release can be made"
 
 .PHONY: release
-release: check-release bumpversion-release publish bumpversion-patch
+release: check-release bumpversion-release docker-push publish bumpversion-patch
 
 .PHONY: release-test
 release-test: check-release bumpversion-release-test publish-test bumpversion-revert

From 8e7867edc3f7c97737fbf72d2fb2d88c8d7c6498 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 10 Jul 2020 12:58:12 +0200
Subject: [PATCH 070/171] =?UTF-8?q?Bump=20version:=200.2.2.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.2.2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index c9e61c2..6ae815d 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.2.dev0'
+__version__ = '0.2.2'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 7f91cdd..924ad00 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.2.dev0
+current_version = 0.2.2
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 9ae475d..cbb9883 100644
--- a/setup.py
+++ b/setup.py
@@ -104,6 +104,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.2.dev0',
+    version='0.2.2',
     zip_safe=False,
 )

From 437887858fd4254ad78a0521119bb997509bc0a1 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 10 Jul 2020 13:36:41 +0200
Subject: [PATCH 071/171] =?UTF-8?q?Bump=20version:=200.2.2=20=E2=86=92=200?=
 =?UTF-8?q?.2.3.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 6ae815d..17dc390 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.2'
+__version__ = '0.2.3.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 924ad00..2c808a1 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.2
+current_version = 0.2.3.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index cbb9883..0a39dce 100644
--- a/setup.py
+++ b/setup.py
@@ -104,6 +104,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.2',
+    version='0.2.3.dev0',
     zip_safe=False,
 )

From cba9b36dc69d4e6a32a94b4be84930189730cfe7 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 10 Jul 2020 13:47:53 +0200
Subject: [PATCH 072/171] Fix typo

---
 HISTORY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index 7e1e8ae..d9c599b 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -2,7 +2,7 @@
 
 ## 0.2.2 - 2020-07-10
 
-### Internam Imrpovements
+### Internal Imrpovements
 
 * Added github actions.
 

From 6212f0320f0350a5cf50b6447d5ad730d4511c2c Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Wed, 5 Aug 2020 18:50:26 +0200
Subject: [PATCH 073/171] renaming pipelines

---
 ...json => normalize_dfs_xgb_classifier.json} |   0
 ...nstack_144_lstm_timeseries_classifier.json | 119 ------------------
 ...double_144_lstm_timeseries_classifier.json | 119 ------------------
 ...r.json => unstack_dfs_xgb_classifier.json} |   0
 ...ck_double_lstm_timeseries_classifier.json} |   0
 ...> unstack_lstm_timeseries_classifier.json} |   2 +-
 ...unstack_normalize_dfs_xgb_classifier.json} |   0
 7 files changed, 1 insertion(+), 239 deletions(-)
 rename greenguard/pipelines/{resample_600s_normalize_dfs_1d_xgb_classifier.json => normalize_dfs_xgb_classifier.json} (100%)
 delete mode 100644 greenguard/pipelines/resample_600s_unstack_144_lstm_timeseries_classifier.json
 delete mode 100644 greenguard/pipelines/resample_600s_unstack_double_144_lstm_timeseries_classifier.json
 rename greenguard/pipelines/{resample_600s_unstack_dfs_1d_xgb_classifier.json => unstack_dfs_xgb_classifier.json} (100%)
 rename greenguard/pipelines/{resample_3600s_unstack_double_24_lstm_timeseries_classifier.json => unstack_double_lstm_timeseries_classifier.json} (100%)
 rename greenguard/pipelines/{resample_3600s_unstack_24_lstm_timeseries_classifier.json => unstack_lstm_timeseries_classifier.json} (98%)
 rename greenguard/pipelines/{resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json => unstack_normalize_dfs_xgb_classifier.json} (100%)

diff --git a/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json b/greenguard/pipelines/normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json
rename to greenguard/pipelines/normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/resample_600s_unstack_144_lstm_timeseries_classifier.json b/greenguard/pipelines/resample_600s_unstack_144_lstm_timeseries_classifier.json
deleted file mode 100644
index b54702b..0000000
--- a/greenguard/pipelines/resample_600s_unstack_144_lstm_timeseries_classifier.json
+++ /dev/null
@@ -1,119 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
-        "pandas.DataFrame.pop",
-        "pandas.DataFrame.pop",
-        "sklearn.impute.SimpleImputer",
-        "sklearn.preprocessing.MinMaxScaler",
-        "pandas.DataFrame",
-        "pandas.DataFrame.set",
-        "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
-        "keras.Sequential.LSTMTimeSeriesClassifier"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        },
-        "pandas.DataFrame.pop#1": {
-            "item": "turbine_id"
-        },
-        "pandas.DataFrame.pop#2": {
-            "item": "timestamp"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "feature_range": [
-                -1,
-                1
-            ]
-        },
-        "pandas.DataFrame#1": {
-            "index": null,
-            "columns": null
-        },
-        "pandas.DataFrame.set#1": {
-            "key": "turbine_id"
-        },
-        "pandas.DataFrame.set#2": {
-            "key": "timestamp"
-        },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "window_size": 144,
-            "cutoff_time": "cutoff_time",
-            "time_index": "timestamp"
-        },
-        "keras.Sequential.LSTMTimeSeriesClassifier": {
-            "epochs": 35,
-            "verbose": false
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#2": {
-            "X": "readings"
-        },
-        "sklearn.impute.SimpleImputer#1": {
-            "X": "readings"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.set#1": {
-            "X": "readings",
-            "value": "turbine_id"
-        },
-        "pandas.DataFrame.set#2": {
-            "X": "readings",
-            "value": "timestamp"
-        },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "timeseries": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#1": {
-            "item": "turbine_id"
-        },
-        "pandas.DataFrame.pop#2": {
-            "item": "timestamp"
-        },
-        "sklearn.impute.SimpleImputer#1": {
-            "X": "readings"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/greenguard/pipelines/resample_600s_unstack_double_144_lstm_timeseries_classifier.json b/greenguard/pipelines/resample_600s_unstack_double_144_lstm_timeseries_classifier.json
deleted file mode 100644
index 368dd4d..0000000
--- a/greenguard/pipelines/resample_600s_unstack_double_144_lstm_timeseries_classifier.json
+++ /dev/null
@@ -1,119 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
-        "pandas.DataFrame.pop",
-        "pandas.DataFrame.pop",
-        "sklearn.impute.SimpleImputer",
-        "sklearn.preprocessing.MinMaxScaler",
-        "pandas.DataFrame",
-        "pandas.DataFrame.set",
-        "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
-        "keras.Sequential.DoubleLSTMTimeSeriesClassifier"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        },
-        "pandas.DataFrame.pop#1": {
-            "item": "turbine_id"
-        },
-        "pandas.DataFrame.pop#2": {
-            "item": "timestamp"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "feature_range": [
-                -1,
-                1
-            ]
-        },
-        "pandas.DataFrame#1": {
-            "index": null,
-            "columns": null
-        },
-        "pandas.DataFrame.set#1": {
-            "key": "turbine_id"
-        },
-        "pandas.DataFrame.set#2": {
-            "key": "timestamp"
-        },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "window_size": 144,
-            "cutoff_time": "cutoff_time",
-            "time_index": "timestamp"
-        },
-        "keras.Sequential.DoubleLSTMTimeSeriesClassifier": {
-            "epochs": 35,
-            "verbose": false
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#2": {
-            "X": "readings"
-        },
-        "sklearn.impute.SimpleImputer#1": {
-            "X": "readings"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.set#1": {
-            "X": "readings",
-            "value": "turbine_id"
-        },
-        "pandas.DataFrame.set#2": {
-            "X": "readings",
-            "value": "timestamp"
-        },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "timeseries": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#1": {
-            "item": "turbine_id"
-        },
-        "pandas.DataFrame.pop#2": {
-            "item": "timestamp"
-        },
-        "sklearn.impute.SimpleImputer#1": {
-            "X": "readings"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json b/greenguard/pipelines/unstack_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json
rename to greenguard/pipelines/unstack_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/resample_3600s_unstack_double_24_lstm_timeseries_classifier.json b/greenguard/pipelines/unstack_double_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/resample_3600s_unstack_double_24_lstm_timeseries_classifier.json
rename to greenguard/pipelines/unstack_double_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/resample_3600s_unstack_24_lstm_timeseries_classifier.json b/greenguard/pipelines/unstack_lstm_timeseries_classifier.json
similarity index 98%
rename from greenguard/pipelines/resample_3600s_unstack_24_lstm_timeseries_classifier.json
rename to greenguard/pipelines/unstack_lstm_timeseries_classifier.json
index 7e494d5..ab9dd99 100644
--- a/greenguard/pipelines/resample_3600s_unstack_24_lstm_timeseries_classifier.json
+++ b/greenguard/pipelines/unstack_lstm_timeseries_classifier.json
@@ -54,7 +54,7 @@
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
         },
-        "keras.Sequential.LSTMTimeSeriesClassifier": {
+        "keras.Sequential.LSTMTimeSeriesClassifier#1": {
             "epochs": 35,
             "verbose": false
         }
diff --git a/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json b/greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json
rename to greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json

From 720d02e439d71ea714094493b911e35adb14875e Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Wed, 5 Aug 2020 18:52:07 +0200
Subject: [PATCH 074/171] Updating setup, Makefile and README

---
 Makefile  |  2 +-
 README.md | 16 +++++++---------
 setup.py  |  2 +-
 3 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/Makefile b/Makefile
index 0ee820a..ea625f3 100644
--- a/Makefile
+++ b/Makefile
@@ -101,7 +101,7 @@ fix-lint: ## fix lint issues using autoflake, autopep8, and isort
 
 .PHONY: test-unit
 test-unit: ## run tests quickly with the default Python
-	python -m pytest --basetemp=${ENVTMPDIR} --cov=greenguard
+	python -m pytest --cov=greenguard
 
 .PHONY: test-readme
 test-readme: ## run the readme snippets
diff --git a/README.md b/README.md
index db84cb8..9c37cb4 100644
--- a/README.md
+++ b/README.md
@@ -219,20 +219,18 @@ The returned `pipeline` variable will be `list` containing the names of all the
 available in the GreenGuard system:
 
 ```
-['resample_600s_normalize_dfs_1d_xgb_classifier',
- 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',
- 'resample_600s_unstack_double_144_lstm_timeseries_classifier',
- 'resample_3600s_unstack_24_lstm_timeseries_classifier',
- 'resample_3600s_unstack_double_24_lstm_timeseries_classifier',
- 'resample_600s_unstack_dfs_1d_xgb_classifier',
- 'resample_600s_unstack_144_lstm_timeseries_classifier']
+['unstack_double_lstm_timeseries_classifier',
+ 'unstack_lstm_timeseries_classifier',
+ 'unstack_normalize_dfs_xgb_classifier',
+ 'unstack_dfs_xgb_classifier',
+ 'normalize_dfs_xgb_classifier']
 ```
 
 For the rest of this tutorial, we will select and use the pipeline
-`resample_600s_unstack_normalize_dfs_1d_xgb_classifier` as our template.
+`normalize_dfs_xgb_classifier` as our template.
 
 ```python3
-pipeline_name = 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier'
+pipeline_name = 'normalize_dfs_xgb_classifier'
 ```
 
 ## 3. Fitting the Pipeline
diff --git a/setup.py b/setup.py
index 0a39dce..4c9c640 100644
--- a/setup.py
+++ b/setup.py
@@ -18,7 +18,7 @@
 install_requires = [
     'Keras>=2.1.6,<2.4',
     'mlblocks>=0.3.4,<0.4',
-    'mlprimitives>=0.2.4,<0.3',
+    'mlprimitives>=0.2.5,<0.3',
     'scipy>=1.0.1,<1.4.0',
     'baytune>=0.3.9,<0.4',
     'numpy>=1.15.4,<1.17',

From 465610b08b61193c9adbb4602b2ed7433858e96f Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Wed, 5 Aug 2020 18:53:14 +0200
Subject: [PATCH 075/171] Improving pipeline.py

---
 greenguard/pipeline.py | 58 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 51 insertions(+), 7 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index b4c9c4c..785a747 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -93,6 +93,29 @@ def get_pipelines(pattern='', path=False, unstacked=False):
     return pipelines
 
 
+def generate_init_params(template_names, init_params):
+    """Generate init_params dicts.
+
+    The output will be a dict that contains one entry for each template
+    with a dict indicating the init_params to use with that template.
+    """
+    if not init_params:
+        init_params = {}
+    elif isinstance(init_params, list):
+        init_params = dict(zip(template_names, init_params))
+
+    if not any(name in init_params for name in template_names):
+        return {
+            name: deepcopy(init_params)
+            for name in template_names
+        }
+    else:
+        return {
+            name: deepcopy(init_params.get(name, {}))
+            for name in template_names
+        }
+
+
 class GreenGuardPipeline(object):
     """Main Machine Learning component in the GreenGuard project.
 
@@ -132,7 +155,7 @@ class GreenGuardPipeline(object):
             Template to use. If a ``str`` is given, load the corresponding
             ``MLPipeline``. Also can be a list combining both.
         metric (str or function):
-            Metric to use. If an ``str`` is give it must be one of the metrics
+            Metric to use. If an ``str`` is given it must be one of the metrics
             defined in the ``greenguard.metrics.METRICS`` dictionary.
         cost (bool):
             Whether the metric is a cost function (the lower the better) or not.
@@ -226,23 +249,44 @@ def _get_templates(self, templates):
         for template in templates:
             if isinstance(template, str):
                 template_name = template
-                template = load_pipeline(template_name)
+                template = deepcopy(load_pipeline(template_name))
             else:
                 template_name = md5(json.dumps(template)).digest()
+
             template_dicts[template_name] = template
             template_names.append(template_name)
 
         return template_names, template_dicts
 
     def _generate_init_params(self, init_params):
+        """Generate init_params dicts.
+
+        The output will be a dict that contains one entry for each template
+        with a dict indicating the init_params to use with that template.
+        """
         if not init_params:
-            self._init_params = {}
+            init_params = {}
         elif isinstance(init_params, list):
-            self._init_params = dict(zip(self._template_names, init_params))
-        elif any(name in init_params for name in self._template_names):
-            self._init_params = init_params
+            init_params = dict(zip(self._template_names, init_params))
+
+        if not any(name in init_params for name in self._template_names):
+            self._init_params = {
+                name: deepcopy(init_params)
+                for name in self._template_names
+            }
+        else:
+            self._init_params = {
+                name: deepcopy(init_params.get(name, {}))
+                for name in self._template_names
+            }
 
     def _generate_preprocessing(self, preprocessing):
+        """Generate preprocessing dict.
+
+        The preprocessing dict contains one entry for each template and
+        an integer indicating the number of preprocessing steps for that
+        template.
+        """
         if isinstance(preprocessing, int):
             self._preprocessing = {name: preprocessing for name in self._template_names}
         else:
@@ -279,7 +323,7 @@ def __init__(self, templates, metric='accuracy', cost=False, init_params=None, s
         self.templates = templates
         self._template_names, self._template_dicts = self._get_templates(templates)
         self._default_init_params = {}
-        self._generate_init_params(init_params)
+        self._init_params = generate_init_params(self._template_names, init_params)
 
         for name, template in self._template_dicts.items():
             init_params = self._init_params.get(name, self._default_init_params)

From f9564a3db36c5db0c5f1982bce014376e155fdda Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Wed, 5 Aug 2020 19:00:50 +0200
Subject: [PATCH 076/171] New tutorial

---
 tutorials/03_Benchmarking.ipynb | 220 ++++++++++++++++++++++++++++++++
 1 file changed, 220 insertions(+)
 create mode 100644 tutorials/03_Benchmarking.ipynb

diff --git a/tutorials/03_Benchmarking.ipynb b/tutorials/03_Benchmarking.ipynb
new file mode 100644
index 0000000..ad3c41d
--- /dev/null
+++ b/tutorials/03_Benchmarking.ipynb
@@ -0,0 +1,220 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging;\n",
+    "\n",
+    "logging.basicConfig(level=logging.INFO)\n",
+    "logging.getLogger().setLevel(level=logging.ERROR)\n",
+    "logging.getLogger('greenguard').setLevel(level=logging.INFO)\n",
+    "\n",
+    "import warnings\n",
+    "warnings.simplefilter(\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "['unstack_double_lstm_timeseries_classifier',\n",
+       " 'unstack_lstm_timeseries_classifier']"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from greenguard import get_pipelines\n",
+    "\n",
+    "get_pipelines('lstm')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from greenguard.benchmark import evaluate_templates"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2020-08-05 17:14:08,860 - INFO - greenguard.pipeline - New configuration found:\n",
+      "  Template: unstack_lstm_timeseries_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
+      "2020-08-05 17:14:16,974 - INFO - greenguard.pipeline - New configuration found:\n",
+      "  Template: unstack_lstm_timeseries_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): constant\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 397\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.38706239055719976\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 367\n",
+      "2020-08-05 17:14:36,898 - INFO - greenguard.pipeline - New configuration found:\n",
+      "  Template: unstack_lstm_timeseries_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): most_frequent\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 90\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.7472037016839137\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 215\n",
+      "2020-08-05 17:15:00,145 - INFO - greenguard.pipeline - New configuration found:\n",
+      "  Template: unstack_lstm_timeseries_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
+      "2020-08-05 17:15:14,558 - INFO - greenguard.pipeline - New configuration found:\n",
+      "  Template: unstack_lstm_timeseries_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): most_frequent\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 245\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.23326913705083852\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 425\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>template</th>\n",
+       "      <th>window_size</th>\n",
+       "      <th>resample_rule</th>\n",
+       "      <th>default_test</th>\n",
+       "      <th>default_cv</th>\n",
+       "      <th>tuned_cv</th>\n",
+       "      <th>tuned_test</th>\n",
+       "      <th>status</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>unstack_lstm_timeseries_classifier</td>\n",
+       "      <td>1d</td>\n",
+       "      <td>1h</td>\n",
+       "      <td>0.603774</td>\n",
+       "      <td>0.624928</td>\n",
+       "      <td>0.638871</td>\n",
+       "      <td>0.666667</td>\n",
+       "      <td>OK</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>unstack_lstm_timeseries_classifier</td>\n",
+       "      <td>3d</td>\n",
+       "      <td>4h</td>\n",
+       "      <td>0.708333</td>\n",
+       "      <td>0.607978</td>\n",
+       "      <td>0.640048</td>\n",
+       "      <td>0.709677</td>\n",
+       "      <td>OK</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                             template window_size resample_rule  default_test  \\\n",
+       "0  unstack_lstm_timeseries_classifier          1d            1h      0.603774   \n",
+       "1  unstack_lstm_timeseries_classifier          3d            4h      0.708333   \n",
+       "\n",
+       "   default_cv  tuned_cv  tuned_test status  \n",
+       "0    0.624928  0.638871    0.666667     OK  \n",
+       "1    0.607978  0.640048    0.709677     OK  "
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "evaluate_templates(\n",
+    "    templates=[\n",
+    "        'unstack_lstm_timeseries_classifier',\n",
+    "    ],\n",
+    "    window_size_rule=[\n",
+    "        ('1d', '1h'),\n",
+    "        ('3d', '4h'),\n",
+    "    ],\n",
+    "    init_params={\n",
+    "        'keras.Sequential.LSTMTimeSeriesClassifier#1': {\n",
+    "            'epochs': 1,\n",
+    "        }\n",
+    "    },\n",
+    "    tuning_iterations=3,\n",
+    "    cv_splits=3,\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 7f974efd8ec715f11c1053299d86aab148540156 Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Wed, 5 Aug 2020 19:01:11 +0200
Subject: [PATCH 077/171] New test

---
 tests/test_benchmark.py | 58 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 tests/test_benchmark.py

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
new file mode 100644
index 0000000..8a446e0
--- /dev/null
+++ b/tests/test_benchmark.py
@@ -0,0 +1,58 @@
+"""Tests for `greenguard.benchmark` module."""
+from sklearn.metrics import f1_score
+
+from greenguard.benchmark import evaluate_templates
+from greenguard.demo import load_demo
+
+
+def test_predict():
+    # setup
+    templates = [
+        'unstack_lstm_timeseries_classifier'
+    ]
+
+    window_size_rule = [
+        ('1d', '1h')
+    ]
+
+    target_times, readings = load_demo()
+    target_times = target_times.head(10)
+    readings = readings.head(1000)
+
+    # run
+    scores_df = evaluate_templates(
+        target_times=target_times,
+        readings=readings,
+        templates=templates,
+        window_size_rule=window_size_rule,
+        metric=f1_score,
+        tuning_iterations=1,
+        cv_splits=2
+    )
+
+    # assert
+    expected_columns = [
+        'template',
+        'window_size',
+        'resample_rule',
+        'default_test',
+        'default_cv',
+        'tuned_cv',
+        'tuned_test',
+        'status'
+    ]
+
+    expected_dtypes = [
+        'object',
+        'object',
+        'object',
+        'float64',
+        'float64',
+        'float64',
+        'float64',
+        'object'
+    ]
+
+    assert (scores_df.columns.to_list() == expected_columns)
+    assert (scores_df.tuned_test.notnull)
+    assert (scores_df.dtypes.to_list() == expected_dtypes)

From 9936f4f928f68be08c402ebd8f3163cc30438edd Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Wed, 5 Aug 2020 19:09:41 +0200
Subject: [PATCH 078/171] benchmark.py

---
 greenguard/benchmark.py | 278 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 278 insertions(+)
 create mode 100644 greenguard/benchmark.py

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
new file mode 100644
index 0000000..67e8b37
--- /dev/null
+++ b/greenguard/benchmark.py
@@ -0,0 +1,278 @@
+import logging
+from itertools import product
+
+import pandas as pd
+from sklearn.model_selection import train_test_split
+
+from greenguard.demo import load_demo
+from greenguard.metrics import METRICS
+from greenguard.pipeline import GreenGuardPipeline, generate_init_params
+
+LOGGER = logging.getLogger(__name__)
+
+
+def _generate_init_params(templates, init_params):
+    if not init_params:
+        init_params = {}
+    elif isinstance(init_params, list):
+        init_params = dict(zip(templates, init_params))
+    elif any(name in init_params for name in templates):
+        init_params = init_params
+    else:
+        init_params = {template: init_params for template in templates}
+
+    return init_params
+
+
+def _build_init_params(template, window_size, rule, template_params):
+    if 'dfs' in template:
+        window_size_rule_params = {
+            'pandas.DataFrame.resample#1': {
+                'rule': rule,
+            },
+            'featuretools.dfs.json#1': {
+                'training_window': window_size,
+            }
+        }
+    elif 'lstm' in template:
+        window_size_rule_params = {
+            'pandas.DataFrame.resample#1': {
+                'rule': rule,
+            },
+            'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1': {
+                'window_size': window_size,
+            }
+        }
+
+    for primitive, params in window_size_rule_params.items():
+        primitive_params = template_params.get(primitive, {})
+        primitive_params.update(params)
+
+    return template_params
+
+
+def _build_init_preprocessing(templates, template, preprocessing):
+    if isinstance(preprocessing, int):
+        return preprocessing
+    elif isinstance(preprocessing, list):
+        preprocessing = dict(zip(templates, preprocessing))
+
+    return preprocessing.get(template, 0)
+
+
+def evaluate_template(template, target_times, readings, metric='f1', tuning_iterations=50,
+                      preprocessing=0, init_params=None, cost=False, test_size=0.25, cv_splits=3,
+                      random_state=0):
+    """Returns the scores for a given template.
+
+    Args:
+        template (str):
+            Given template to evaluate.
+        metric (function or str):
+            Metric to use. If an ``str`` is give it must be one of the metrics
+            defined in the ``greenguard.metrics.METRICS`` dictionary.
+        target_times (DataFrame):
+            Contains the specefication problem that we are solving, which has three columns:
+
+                * turbine_id: Unique identifier of the turbine which this label corresponds to.
+                * cutoff_time: Time associated with this target.
+                * target: The value that we want to predict. This can either be a numerical value
+                          or a categorical label. This column can also be skipped when preparing
+                          data that will be used only to make predictions and not to fit any
+                          pipeline.
+
+        readings (DataFrame):
+            Contains the signal data from different sensors, with the following columns:
+
+                * turbine_id: Unique identifier of the turbine which this reading comes from.
+                * signal_id: Unique identifier of the signal which this reading comes from.
+                * timestamp (datetime): Time where the reading took place, as a datetime.
+                * value (float): Numeric value of this reading.
+
+        tuning_iterations (int):
+            Number of iterations to be used.
+        preprocessing (int, list or dict):
+            Type of preprocessing to be used.
+        init_params (list):
+            Initialization parameters for the pipeline.
+        cost (bool):
+            Wheter the metric is a cost function (the lower the better) or not.
+        test_size (float):
+            Percentage of the data set to be used for the test.
+        cv_splits (int):
+            Amount of splits to create.
+        random_state (int):
+            Random number of train_test split.
+
+    Returns:
+        scores (dict):
+            Stores the four types of scores that are being evaluate.
+    """
+    scores = dict()
+
+    train, test = train_test_split(target_times, test_size=test_size, random_state=random_state)
+
+    if isinstance(metric, str):
+        metric, cost = METRICS[metric]
+
+    pipeline = GreenGuardPipeline(template, metric, cost=cost, cv_splits=cv_splits,
+                                  init_params=init_params, preprocessing=preprocessing)
+
+    # Computing the default test score
+    pipeline.fit(train, readings)
+    predictions = pipeline.predict(test, readings)
+
+    scores['default_test'] = metric(test['target'], predictions)
+
+    # Computing the default cross validation score
+    session = pipeline.tune(train, readings)
+    session.run(1)
+
+    scores['default_cv'] = pipeline.cv_score
+
+    # Computing the cross validation score with tuned hyperparameters
+    session.run(tuning_iterations)
+
+    scores['tuned_cv'] = pipeline.cv_score
+
+    # Computing the test score with tuned hyperparameters
+    pipeline.fit(train, readings)
+    predictions = pipeline.predict(test, readings)
+
+    scores['tuned_test'] = metric(test['target'], predictions)
+
+    return scores
+
+
+def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iterations=50,
+                       init_params=None, target_times=None, readings=None, preprocessing=0,
+                       cost=False, test_size=0.25, cv_splits=3, random_state=0, output_path=None):
+    """Execute the benchmark process and optionally store the result as a ``CSV``.
+
+    Args:
+        templates (list):
+            List of templates to try.
+        window_size_rule (list):
+            List of tupples (int, str or Timedelta object).
+        metric (function or str).
+            Metric to use. If an ``str`` is give it must be one of the metrics
+            defined in the ``greenguard.metrics.METRICS`` dictionary.
+        tuning_iterations (int):
+            Number of iterations to be used.
+        target_times (DataFrame):
+            Contains the specefication problem that we are solving, which has three columns:
+
+                * turbine_id: Unique identifier of the turbine which this label corresponds to.
+                * cutoff_time: Time associated with this target.
+                * target: The value that we want to predict. This can either be a numerical value
+                          or a categorical label. This column can also be skipped when preparing
+                          data that will be used only to make predictions and not to fit any
+                          pipeline.
+
+        readings (DataFrame):
+            Contains the signal data from different sensors, with the following columns:
+
+                * turbine_id: Unique identifier of the turbine which this reading comes from.
+                * signal_id: Unique identifier of the signal which this reading comes from.
+                * timestamp (datetime): Time where the reading took place, as a datetime.
+                * value (float): Numeric value of this reading.
+
+        preprocessing (int, list or dict):
+            Type of preprocessing to be used.
+        cost (bool):
+            Wheter the metric is a cost function (the lower the better) or not.
+        test_size (float):
+            Percentage of the data set to be used for the test.
+        cv_splits (int):
+            Amount of splits to create.
+        random_state (int):
+            Random number of train_test split.
+        output_path (str):
+            Path where to save the benchmark report.
+
+    Returns:
+        pandas.DataFrame or None:
+            If ``output_path`` is ``None`` it will return a ``pandas.DataFrame`` object,
+            else it will dump the results in the specified ``output_path``.
+
+    Example:
+        >>> from sklearn.metrics import f1_score
+        >>> templates = [
+        ...    'normalize_dfs_xgb_classifier',
+        ...    'unstack_lstm_timeseries_classifier'
+        ... ]
+        >>> window_size_rule = [
+        ...     ('30d','12h'),
+        ...     ('7d','4h')
+        ... ]
+        >>> preprocessing = [0, 1]
+        >>> scores_df = evaluate_templates(
+        ...                 templates=templates,
+        ...                 window_size_rule=window_size_rule,
+        ...                 metric=f1_score,
+        ...                 tuning_iterations=5,
+        ...                 preprocessing=preprocessing,
+        ...                 cost=False,
+        ...                 test_size=0.25,
+        ...                 cv_splits=3,
+        ...                 random_state=0
+        ...             )
+        >>> scores_df
+                                 template window_size resample_rule  default_test  default_cv  tuned_cv  tuned_test status
+    0  unstack_lstm_timeseries_classifier         30d           12h      0.720000    0.593634  0.627883    0.775510     OK
+    1  unstack_lstm_timeseries_classifier          7d            4h      0.723404    0.597440  0.610766    0.745098     OK
+    2        normalize_dfs_xgb_classifier         30d           12h      0.581818    0.619698  0.637123    0.596491     OK
+    3        normalize_dfs_xgb_classifier          7d            4h      0.581818    0.619698  0.650367    0.603774     OK
+
+    """  # noqa
+
+    if readings is None and target_times is None:
+        target_times, readings = load_demo()
+
+    init_params = generate_init_params(templates, init_params)
+
+    scores_list = []
+    for template, window_rule in product(templates, window_size_rule):
+        window_size, rule = window_rule
+
+        scores = dict()
+        scores['template'] = template
+        scores['window_size'] = window_size
+        scores['resample_rule'] = rule
+
+        try:
+            template_params = init_params[template]
+            template_params = _build_init_params(template, window_size, rule, template_params)
+            init_preprocessing = _build_init_preprocessing(templates, template, preprocessing)
+
+            result = evaluate_template(
+                template=template,
+                target_times=target_times,
+                readings=readings,
+                metric=metric,
+                tuning_iterations=tuning_iterations,
+                preprocessing=init_preprocessing,
+                init_params=template_params,
+                cost=cost,
+                test_size=test_size,
+                cv_splits=cv_splits,
+                random_state=random_state)
+
+            scores.update(result)
+            scores['status'] = 'OK'
+
+        except Exception:
+            scores['status'] = 'ERRORED'
+            LOGGER.exception('Could not score template %s ', template)
+
+        scores_list.append(scores)
+
+    results = pd.DataFrame.from_records(scores_list)
+    results = results.reindex(['template', 'window_size', 'resample_rule', 'default_test',
+                               'default_cv', 'tuned_cv', 'tuned_test', 'status'], axis=1)
+
+    if output_path:
+        LOGGER.info('Saving benchmark report to %s', output_path)
+        results.to_csv(output_path)
+    else:
+        return results

From d9b6222f1358f56283a44591b0ca5bf94838dcaf Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Thu, 6 Aug 2020 16:44:29 +0200
Subject: [PATCH 079/171] Removing py3.5

---
 .github/workflows/tests.yml |  2 +-
 .travis.yml                 |  1 -
 setup.py                    |  3 +--
 tox.ini                     | 14 +++++---------
 4 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 093fa94..135d2a5 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.5, 3.6, 3.7]
+        python-version: [3.6, 3.7]
         os: [ubuntu-latest]
 
     steps:
diff --git a/.travis.yml b/.travis.yml
index 4cefe52..641dff9 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,7 +4,6 @@ language: python
 python:
   - 3.7
   - 3.6
-  - 3.5
 
 # Command to install dependencies
 install:
diff --git a/setup.py b/setup.py
index 4c9c640..6cb3298 100644
--- a/setup.py
+++ b/setup.py
@@ -77,7 +77,6 @@
         'License :: OSI Approved :: MIT License',
         'Natural Language :: English',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
     ],
@@ -99,7 +98,7 @@
     long_description_content_type='text/markdown',
     name='greenguard',
     packages=find_packages(include=['greenguard', 'greenguard.*']),
-    python_requires='>=3.5,<3.8',
+    python_requires='>=3.6,<3.8',
     setup_requires=setup_requires,
     test_suite='tests',
     tests_require=tests_require,
diff --git a/tox.ini b/tox.ini
index 31724c5..91af938 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,29 +1,25 @@
 [tox]
-envlist = py{35,36,37}, test-devel
+envlist = py{36,37}, test-devel
 
 [travis]
 python =
     3.7: py37, test-devel
     3.6: py36
-    3.5: py35
 
 [gh-actions]
 python =
     3.7: py37, test-devel
     3.6: py36
-    3.5: py35
 
 [testenv]
 passenv = CI TRAVIS TRAVIS_*
-skipsdist = true
-skip_install = true
-commands_pre =
-    /usr/bin/env pip install .[test]
+skipsdist = false
+skip_install = false
+extras = test
 commands =
     /usr/bin/env make test
 
 [testenv:test-devel]
-commands_pre =
-    /usr/bin/env pip install .[dev]
+extras = dev
 commands =
     /usr/bin/env make test-devel

From 73c3c9e0d75be7256edd2988e99790844acdd2c8 Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Thu, 6 Aug 2020 16:44:59 +0200
Subject: [PATCH 080/171] Reducing readings

---
 tests/test_benchmark.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 8a446e0..4dfe576 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -17,7 +17,7 @@ def test_predict():
 
     target_times, readings = load_demo()
     target_times = target_times.head(10)
-    readings = readings.head(1000)
+    readings = readings.head(100)
 
     # run
     scores_df = evaluate_templates(

From a71c6cfab585235300220881527ff8320e34d72f Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Thu, 6 Aug 2020 18:19:44 +0200
Subject: [PATCH 081/171] Fixing pipeline

---
 .../pipelines/unstack_double_lstm_timeseries_classifier.json    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/greenguard/pipelines/unstack_double_lstm_timeseries_classifier.json b/greenguard/pipelines/unstack_double_lstm_timeseries_classifier.json
index 7f4e8a6..dede502 100644
--- a/greenguard/pipelines/unstack_double_lstm_timeseries_classifier.json
+++ b/greenguard/pipelines/unstack_double_lstm_timeseries_classifier.json
@@ -54,7 +54,7 @@
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
         },
-        "keras.Sequential.DoubleLSTMTimeSeriesClassifier": {
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier#1": {
             "epochs": 35,
             "verbose": false
         }

From 13715fa4ed6e0202de43837fa7131b6d02efdd3c Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Thu, 6 Aug 2020 18:22:07 +0200
Subject: [PATCH 082/171] Function generate_preprocessing

---
 greenguard/benchmark.py | 26 ++---------------
 greenguard/pipeline.py  | 62 ++++++++++++++---------------------------
 2 files changed, 23 insertions(+), 65 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 67e8b37..bd1cdb3 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -6,24 +6,11 @@
 
 from greenguard.demo import load_demo
 from greenguard.metrics import METRICS
-from greenguard.pipeline import GreenGuardPipeline, generate_init_params
+from greenguard.pipeline import GreenGuardPipeline, generate_init_params, generate_preprocessing
 
 LOGGER = logging.getLogger(__name__)
 
 
-def _generate_init_params(templates, init_params):
-    if not init_params:
-        init_params = {}
-    elif isinstance(init_params, list):
-        init_params = dict(zip(templates, init_params))
-    elif any(name in init_params for name in templates):
-        init_params = init_params
-    else:
-        init_params = {template: init_params for template in templates}
-
-    return init_params
-
-
 def _build_init_params(template, window_size, rule, template_params):
     if 'dfs' in template:
         window_size_rule_params = {
@@ -51,15 +38,6 @@ def _build_init_params(template, window_size, rule, template_params):
     return template_params
 
 
-def _build_init_preprocessing(templates, template, preprocessing):
-    if isinstance(preprocessing, int):
-        return preprocessing
-    elif isinstance(preprocessing, list):
-        preprocessing = dict(zip(templates, preprocessing))
-
-    return preprocessing.get(template, 0)
-
-
 def evaluate_template(template, target_times, readings, metric='f1', tuning_iterations=50,
                       preprocessing=0, init_params=None, cost=False, test_size=0.25, cv_splits=3,
                       random_state=0):
@@ -243,7 +221,7 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
         try:
             template_params = init_params[template]
             template_params = _build_init_params(template, window_size, rule, template_params)
-            init_preprocessing = _build_init_preprocessing(templates, template, preprocessing)
+            init_preprocessing = generate_preprocessing(templates, template, preprocessing)
 
             result = evaluate_template(
                 template=template,
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 785a747..335b67b 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -116,6 +116,26 @@ def generate_init_params(template_names, init_params):
         }
 
 
+def generate_preprocessing(templates_names, template, preprocessing):
+    """Generate preprocessing dict.
+
+    The preprocessing dict contains one entry for each template and
+    an integer indicating the number of preprocessing steps for that
+    template.
+    """
+    if isinstance(preprocessing, int):
+        preprocessing = {template: preprocessing for template in templates_names}
+    else:
+        if isinstance(preprocessing, list):
+            preprocessing = dict(zip(templates_names, preprocessing))
+
+        preprocessing = {
+            template: preprocessing.get(template, 0)
+            for name in templates_names
+        }
+    return preprocessing
+
+
 class GreenGuardPipeline(object):
     """Main Machine Learning component in the GreenGuard project.
 
@@ -258,46 +278,6 @@ def _get_templates(self, templates):
 
         return template_names, template_dicts
 
-    def _generate_init_params(self, init_params):
-        """Generate init_params dicts.
-
-        The output will be a dict that contains one entry for each template
-        with a dict indicating the init_params to use with that template.
-        """
-        if not init_params:
-            init_params = {}
-        elif isinstance(init_params, list):
-            init_params = dict(zip(self._template_names, init_params))
-
-        if not any(name in init_params for name in self._template_names):
-            self._init_params = {
-                name: deepcopy(init_params)
-                for name in self._template_names
-            }
-        else:
-            self._init_params = {
-                name: deepcopy(init_params.get(name, {}))
-                for name in self._template_names
-            }
-
-    def _generate_preprocessing(self, preprocessing):
-        """Generate preprocessing dict.
-
-        The preprocessing dict contains one entry for each template and
-        an integer indicating the number of preprocessing steps for that
-        template.
-        """
-        if isinstance(preprocessing, int):
-            self._preprocessing = {name: preprocessing for name in self._template_names}
-        else:
-            if isinstance(preprocessing, list):
-                preprocessing = dict(zip(self._template_names, preprocessing))
-
-            self._preprocessing = {
-                name: preprocessing.get(name, 0)
-                for name in self._template_names
-            }
-
     def _build_pipeline(self):
         self._pipeline = MLPipeline(self.template)
 
@@ -330,7 +310,7 @@ def __init__(self, templates, metric='accuracy', cost=False, init_params=None, s
             template_params = template.setdefault('init_params', {})
             self._update_params(template_params, init_params)
 
-        self._generate_preprocessing(preprocessing)
+        generate_preprocessing(self._template_names, self.templates, preprocessing)
         self._set_template(self._template_names[0])
         self._hyperparameters = dict()
         self._build_pipeline()

From 25c9b8cc72b57d574f67fe3ca1cf8d0bb28412c9 Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Fri, 7 Aug 2020 14:18:20 +0200
Subject: [PATCH 083/171] Fix error with lstm pipelines

---
 greenguard/pipeline.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 335b67b..db84050 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -131,7 +131,7 @@ def generate_preprocessing(templates_names, template, preprocessing):
 
         preprocessing = {
             template: preprocessing.get(template, 0)
-            for name in templates_names
+            for template in templates_names
         }
     return preprocessing
 
@@ -310,7 +310,8 @@ def __init__(self, templates, metric='accuracy', cost=False, init_params=None, s
             template_params = template.setdefault('init_params', {})
             self._update_params(template_params, init_params)
 
-        generate_preprocessing(self._template_names, self.templates, preprocessing)
+        self._preprocessing = generate_preprocessing(
+                self._template_names, self.templates, preprocessing)
         self._set_template(self._template_names[0])
         self._hyperparameters = dict()
         self._build_pipeline()

From 35b2e1171c1e8ff4fc36c77cc258dd5aa23a1922 Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Fri, 7 Aug 2020 14:20:29 +0200
Subject: [PATCH 084/171] Fix lint

---
 greenguard/pipeline.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index db84050..c38bc16 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -311,7 +311,7 @@ def __init__(self, templates, metric='accuracy', cost=False, init_params=None, s
             self._update_params(template_params, init_params)
 
         self._preprocessing = generate_preprocessing(
-                self._template_names, self.templates, preprocessing)
+            self._template_names, self.templates, preprocessing)
         self._set_template(self._template_names[0])
         self._hyperparameters = dict()
         self._build_pipeline()

From eea55c53bc7e5c22206b16a216be7adc0d8c3414 Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Fri, 7 Aug 2020 15:20:18 +0200
Subject: [PATCH 085/171] Updating notebook tutorial

---
 .../01_GreenGuard_Machine_Learning.ipynb      | 320 +++++++++---------
 1 file changed, 159 insertions(+), 161 deletions(-)

diff --git a/tutorials/01_GreenGuard_Machine_Learning.ipynb b/tutorials/01_GreenGuard_Machine_Learning.ipynb
index e17f0b2..7fab764 100644
--- a/tutorials/01_GreenGuard_Machine_Learning.ipynb
+++ b/tutorials/01_GreenGuard_Machine_Learning.ipynb
@@ -36,7 +36,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -62,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -84,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -157,7 +157,7 @@
        "4       T001  2013-01-16       0"
       ]
      },
-     "execution_count": 3,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -168,7 +168,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -177,7 +177,7 @@
        "(353, 3)"
       ]
      },
-     "execution_count": 4,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -188,7 +188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -200,7 +200,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 5,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -211,7 +211,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -290,7 +290,7 @@
        "4       T001       S05 2013-01-10  273.0"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -301,7 +301,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -310,7 +310,7 @@
        "(1313540, 4)"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -321,7 +321,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -334,7 +334,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 8,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -383,7 +383,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -408,22 +408,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "['resample_600s_normalize_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_double_144_lstm_timeseries_classifier',\n",
-       " 'resample_3600s_unstack_24_lstm_timeseries_classifier',\n",
-       " 'resample_3600s_unstack_double_24_lstm_timeseries_classifier',\n",
-       " 'resample_600s_unstack_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_144_lstm_timeseries_classifier']"
+       "['normalize_dfs_xgb_classifier',\n",
+       " 'unstack_normalize_dfs_xgb_classifier',\n",
+       " 'unstack_dfs_xgb_classifier',\n",
+       " 'unstack_lstm_timeseries_classifier',\n",
+       " 'unstack_double_lstm_timeseries_classifier']"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -443,18 +441,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "['resample_600s_normalize_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
-       " 'resample_600s_unstack_dfs_1d_xgb_classifier']"
+       "['normalize_dfs_xgb_classifier',\n",
+       " 'unstack_normalize_dfs_xgb_classifier',\n",
+       " 'unstack_dfs_xgb_classifier']"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -473,18 +471,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'resample_600s_normalize_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_normalize_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_unstack_normalize_dfs_1d_xgb_classifier.json',\n",
-       " 'resample_600s_unstack_dfs_1d_xgb_classifier': '/home/xals/Projects/MIT/GreenGuard/greenguard/pipelines/resample_600s_unstack_dfs_1d_xgb_classifier.json'}"
+       "{'normalize_dfs_xgb_classifier': '/home/usuario/Projects/GreenGuard/greenguard/pipelines/normalize_dfs_xgb_classifier.json',\n",
+       " 'unstack_normalize_dfs_xgb_classifier': '/home/usuario/Projects/GreenGuard/greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json',\n",
+       " 'unstack_dfs_xgb_classifier': '/home/usuario/Projects/GreenGuard/greenguard/pipelines/unstack_dfs_xgb_classifier.json'}"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 24,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -498,10 +496,10 @@
    "metadata": {},
    "source": [
     "For the rest of this tutorial, we will select and use the templates\n",
-    "`resample_600s_unstack_normalize_dfs_1d_xgb_classifier` and\n",
-    "`resample_600s_normalize_dfs_1d_xgb_classifier`.\n",
+    "`unstack_normalize_dfs_xgb_classifier` and\n",
+    "`normalize_dfs_xgb_classifier`.\n",
     "\n",
-    "The `resample_600s_unstack_normalize_dfs_1d_xgb_classifier` template contains the following steps:\n",
+    "The `unstack_normalize_dfs_xgb_classifier` template contains the following steps:\n",
     "\n",
     "- Resample the data using a 10 minute average aggregation\n",
     "- Unstack the data by signal, so each signal is in a different column\n",
@@ -509,19 +507,19 @@
     "- Use DFS on the readings based on the target_times cutoff times using a 1d window size\n",
     "- Apply an XGBoost Classifier\n",
     "\n",
-    "And the `resample_600s_normalize_dfs_1d_xgb_classifier` template contains the above steps but without\n",
+    "And the `normalize_dfs_xgb_classifier` template contains the above steps but without\n",
     "unstacking the data by signal."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [],
    "source": [
     "templates = [\n",
-    "    'resample_600s_unstack_normalize_dfs_1d_xgb_classifier', \n",
-    "    'resample_600s_normalize_dfs_1d_xgb_classifier'\n",
+    "    'unstack_normalize_dfs_xgb_classifier', \n",
+    "    'normalize_dfs_xgb_classifier'\n",
     "]"
    ]
   },
@@ -548,7 +546,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -570,7 +568,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -587,14 +585,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-07-10 11:39:49,290 - INFO - session - Obtaining default configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n"
+      "INFO:btb.session:Obtaining default configuration for unstack_normalize_dfs_xgb_classifier\n"
      ]
     },
     {
@@ -602,22 +600,22 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 00:32 | Progress: 100%|██████████\n",
-      "Elapsed: 00:16 | Progress: 100%|██████████\n",
+      "Elapsed: 00:34 | Progress: 100%|██████████\n",
+      "Elapsed: 00:18 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 00:32 | Progress: 100%|██████████\n",
-      "Elapsed: 00:16 | Progress: 100%|██████████\n",
+      "Elapsed: 00:36 | Progress: 100%|██████████\n",
+      "Elapsed: 00:17 | Progress: 100%|██████████\n",
       "Built 165 features\n",
-      "Elapsed: 00:32 | Progress: 100%|██████████\n",
-      "Elapsed: 00:15 | Progress: 100%|██████████\n"
+      "Elapsed: 00:38 | Progress: 100%|██████████\n",
+      "Elapsed: 00:17 | Progress: 100%|██████████\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-07-10 11:42:19,561 - INFO - pipeline - New configuration found:\n",
-      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: unstack_normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
       "      ('xgboost.XGBClassifier#1', 'n_estimators'): 100\n",
@@ -625,8 +623,8 @@
       "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
       "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
       "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
-      "2020-07-10 11:42:19,563 - INFO - session - New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.604136604136604\n",
-      "2020-07-10 11:42:19,565 - INFO - session - Obtaining default configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n"
+      "INFO:btb.session:New optimal found: unstack_normalize_dfs_xgb_classifier - 0.605187908496732\n",
+      "INFO:btb.session:Obtaining default configuration for normalize_dfs_xgb_classifier\n"
      ]
     },
     {
@@ -634,51 +632,51 @@
      "output_type": "stream",
      "text": [
       "Built 99 features\n",
-      "Elapsed: 01:28 | Progress: 100%|██████████\n",
-      "Elapsed: 00:45 | Progress: 100%|██████████\n",
+      "Elapsed: 01:44 | Progress: 100%|██████████\n",
+      "Elapsed: 00:52 | Progress: 100%|██████████\n",
       "Built 99 features\n",
-      "Elapsed: 01:29 | Progress: 100%|██████████\n",
-      "Elapsed: 00:47 | Progress: 100%|██████████\n",
+      "Elapsed: 01:38 | Progress: 100%|██████████\n",
+      "Elapsed: 00:52 | Progress: 100%|██████████\n",
       "Built 99 features\n",
-      "Elapsed: 01:32 | Progress: 100%|██████████\n",
-      "Elapsed: 00:48 | Progress: 100%|██████████\n"
+      "Elapsed: 01:39 | Progress: 100%|██████████\n",
+      "Elapsed: 00:49 | Progress: 100%|██████████\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-07-10 11:49:21,971 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:22,446 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:22,682 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:22,862 - INFO - pipeline - New configuration found:\n",
-      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: unstack_normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 16\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 82\n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 20\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 234\n",
       "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39699298238763425\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.06238180737748478\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.23028782510751677\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.9403975339570728\n",
       "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
-      "2020-07-10 11:49:22,864 - INFO - session - New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6110894266631971\n"
+      "INFO:btb.session:New optimal found: unstack_normalize_dfs_xgb_classifier - 0.6106037764640573\n",
+      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': '6cbe94178d761b5c263dc2f7ab1f8205',\n",
-       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       "{'id': '28d8ebbde404a0e501262a652c4d9aa5',\n",
+       " 'name': 'unstack_normalize_dfs_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 16,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 82,\n",
+       "   'max_labels'): 20,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 234,\n",
        "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39699298238763425,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.06238180737748478,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.23028782510751677,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.9403975339570728,\n",
        "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
-       " 'score': 0.6110894266631971}"
+       " 'score': 0.6106037764640573}"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -697,25 +695,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'id': '6cbe94178d761b5c263dc2f7ab1f8205',\n",
-       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       "{'id': '28d8ebbde404a0e501262a652c4d9aa5',\n",
+       " 'name': 'unstack_normalize_dfs_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 16,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 82,\n",
+       "   'max_labels'): 20,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 234,\n",
        "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39699298238763425,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.06238180737748478,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.23028782510751677,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.9403975339570728,\n",
        "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
-       " 'score': 0.6110894266631971}"
+       " 'score': 0.6106037764640573}"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -733,22 +731,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 16,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 82,\n",
+       "  'max_labels'): 20,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 234,\n",
        " ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.39699298238763425,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.06238180737748478,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.23028782510751677,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.9403975339570728,\n",
        " ('xgboost.XGBClassifier#1', 'min_child_weight'): 1}"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 30,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -766,16 +764,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'resample_600s_unstack_normalize_dfs_1d_xgb_classifier'"
+       "'unstack_normalize_dfs_xgb_classifier'"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -794,16 +792,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6110894266631971"
+       "0.6106037764640573"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -823,61 +821,61 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-07-10 11:49:22,952 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:23,246 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:23,464 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:23,668 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:23,791 - INFO - pipeline - New configuration found:\n",
-      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: unstack_normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 80\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 31\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 4\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.32814385597842255\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.19795099494663482\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
-      "2020-07-10 11:49:23,792 - INFO - session - New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6297458681170419\n",
-      "2020-07-10 11:49:23,796 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:23,955 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:24,191 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:24,403 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:24,546 - INFO - session - Generating new proposal configuration for resample_600s_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:25,544 - INFO - session - Generating new proposal configuration for resample_600s_unstack_normalize_dfs_1d_xgb_classifier\n",
-      "2020-07-10 11:49:25,698 - INFO - pipeline - New configuration found:\n",
-      "  Template: resample_600s_unstack_normalize_dfs_1d_xgb_classifier \n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 32\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 10\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.11814847201162682\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.9589332448610124\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 6\n",
+      "INFO:btb.session:New optimal found: unstack_normalize_dfs_xgb_classifier - 0.640497737556561\n",
+      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: unstack_normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 96\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 36\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 9\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3256576169027807\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.1061546068995437\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
-      "2020-07-10 11:49:25,699 - INFO - session - New optimal found: resample_600s_unstack_normalize_dfs_1d_xgb_classifier - 0.6306697372853741\n"
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 98\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 34\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3652063328881058\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.8627183599656656\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 6\n",
+      "INFO:btb.session:New optimal found: unstack_normalize_dfs_xgb_classifier - 0.6592605156037993\n",
+      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': '157087395a2643c9ecc4a2b3549a1fc9',\n",
-       " 'name': 'resample_600s_unstack_normalize_dfs_1d_xgb_classifier',\n",
+       "{'id': 'f6b410d303a1cfeafdcfe0dbcf330767',\n",
+       " 'name': 'unstack_normalize_dfs_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 96,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 36,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 9,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3256576169027807,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.1061546068995437,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
-       " 'score': 0.6306697372853741}"
+       "   'max_labels'): 98,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 34,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3652063328881058,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.8627183599656656,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 6},\n",
+       " 'score': 0.6592605156037993}"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -888,16 +886,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6306697372853741"
+       "0.6592605156037993"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -908,22 +906,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 96,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 36,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 9,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3256576169027807,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.1061546068995437,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 1}"
+       "  'max_labels'): 98,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 34,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3652063328881058,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.8627183599656656,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 6}"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -947,7 +945,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
@@ -955,7 +953,7 @@
      "output_type": "stream",
      "text": [
       "Built 165 features\n",
-      "Elapsed: 00:37 | Progress: 100%|██████████\n"
+      "Elapsed: 00:39 | Progress: 100%|██████████\n"
      ]
     }
    ],
@@ -974,14 +972,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elapsed: 00:12 | Progress: 100%|██████████\n"
+      "Elapsed: 00:14 | Progress: 100%|██████████\n"
      ]
     }
    ],
@@ -998,16 +996,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.7307692307692306"
+       "0.693877551020408"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1036,7 +1034,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1055,7 +1053,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1071,14 +1069,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Elapsed: 00:12 | Progress: 100%|██████████\n"
+      "Elapsed: 00:14 | Progress: 100%|██████████\n"
      ]
     },
     {
@@ -1087,7 +1085,7 @@
        "array([0, 0, 0, 1, 0])"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 41,
      "metadata": {},
      "output_type": "execute_result"
     }

From 69b23373ac80fc52b9b451b2adf58865751a79fb Mon Sep 17 00:00:00 2001
From: joanvaquer <joanvaquer134@gmail.com>
Date: Fri, 7 Aug 2020 16:39:37 +0200
Subject: [PATCH 086/171] Updating docstring

---
 greenguard/benchmark.py | 17 ++++++++++-------
 greenguard/pipeline.py  | 35 ++++++++++++++++++++++++-----------
 2 files changed, 34 insertions(+), 18 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index bd1cdb3..7b6fdc5 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -46,9 +46,6 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
     Args:
         template (str):
             Given template to evaluate.
-        metric (function or str):
-            Metric to use. If an ``str`` is give it must be one of the metrics
-            defined in the ``greenguard.metrics.METRICS`` dictionary.
         target_times (DataFrame):
             Contains the specefication problem that we are solving, which has three columns:
 
@@ -67,10 +64,13 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
                 * timestamp (datetime): Time where the reading took place, as a datetime.
                 * value (float): Numeric value of this reading.
 
+        metric (function or str):
+            Metric to use. If an ``str`` is give it must be one of the metrics
+            defined in the ``greenguard.metrics.METRICS`` dictionary.
         tuning_iterations (int):
             Number of iterations to be used.
         preprocessing (int, list or dict):
-            Type of preprocessing to be used.
+            Number of preprocessing steps to be used.
         init_params (list):
             Initialization parameters for the pipeline.
         cost (bool):
@@ -137,6 +137,8 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
             defined in the ``greenguard.metrics.METRICS`` dictionary.
         tuning_iterations (int):
             Number of iterations to be used.
+        init_params (dict):
+            Initialization parameters for the pipelines.
         target_times (DataFrame):
             Contains the specefication problem that we are solving, which has three columns:
 
@@ -156,7 +158,7 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
                 * value (float): Numeric value of this reading.
 
         preprocessing (int, list or dict):
-            Type of preprocessing to be used.
+            Number of preprocessing steps to be used.
         cost (bool):
             Wheter the metric is a cost function (the lower the better) or not.
         test_size (float):
@@ -208,6 +210,7 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
         target_times, readings = load_demo()
 
     init_params = generate_init_params(templates, init_params)
+    preprocessing = generate_preprocessing(templates, preprocessing)
 
     scores_list = []
     for template, window_rule in product(templates, window_size_rule):
@@ -221,7 +224,7 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
         try:
             template_params = init_params[template]
             template_params = _build_init_params(template, window_size, rule, template_params)
-            init_preprocessing = generate_preprocessing(templates, template, preprocessing)
+            template_preprocessing = preprocessing[template]
 
             result = evaluate_template(
                 template=template,
@@ -229,7 +232,7 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
                 readings=readings,
                 metric=metric,
                 tuning_iterations=tuning_iterations,
-                preprocessing=init_preprocessing,
+                preprocessing=template_preprocessing,
                 init_params=template_params,
                 cost=cost,
                 test_size=test_size,
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index c38bc16..a46c2c6 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -96,8 +96,15 @@ def get_pipelines(pattern='', path=False, unstacked=False):
 def generate_init_params(template_names, init_params):
     """Generate init_params dicts.
 
-    The output will be a dict that contains one entry for each template
-    with a dict indicating the init_params to use with that template.
+    Args:
+        template_names (list):
+            List of templates.
+        init_params (list or dict):
+            Initialization parameters for the templates.
+
+    Returns:
+        Dict that contains one entry for each template with a dict indicating
+        the init_params to use with that template.
     """
     if not init_params:
         init_params = {}
@@ -116,22 +123,29 @@ def generate_init_params(template_names, init_params):
         }
 
 
-def generate_preprocessing(templates_names, template, preprocessing):
+def generate_preprocessing(templates_names, preprocessing):
     """Generate preprocessing dict.
 
-    The preprocessing dict contains one entry for each template and
-    an integer indicating the number of preprocessing steps for that
-    template.
+    Args:
+        template_names (list):
+            List of templates.
+        preprocessing (int, list or dict):
+            Number of preprocessing steps to be used.
+
+    Returns:
+        preprocessing (dict):
+            Contains one entry for each template and an integer indicating the
+            number of preprocessing steps for that template.
     """
     if isinstance(preprocessing, int):
-        preprocessing = {template: preprocessing for template in templates_names}
+        preprocessing = {name: preprocessing for name in templates_names}
     else:
         if isinstance(preprocessing, list):
             preprocessing = dict(zip(templates_names, preprocessing))
 
         preprocessing = {
-            template: preprocessing.get(template, 0)
-            for template in templates_names
+            name: preprocessing.get(name, 0)
+            for name in templates_names
         }
     return preprocessing
 
@@ -310,8 +324,7 @@ def __init__(self, templates, metric='accuracy', cost=False, init_params=None, s
             template_params = template.setdefault('init_params', {})
             self._update_params(template_params, init_params)
 
-        self._preprocessing = generate_preprocessing(
-            self._template_names, self.templates, preprocessing)
+        self._preprocessing = generate_preprocessing(self._template_names, preprocessing)
         self._set_template(self._template_names[0])
         self._hyperparameters = dict()
         self._build_pipeline()

From 386eed3a342a18ff29bffb8dfd2c601636dd9540 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 11 Aug 2020 03:05:35 +0200
Subject: [PATCH 087/171] Update docs.

---
 greenguard/benchmark.py         |  34 +++-
 tutorials/03_Benchmarking.ipynb | 293 ++++++++++++++++++++++++--------
 2 files changed, 243 insertions(+), 84 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 7b6fdc5..c0984db 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -39,8 +39,8 @@ def _build_init_params(template, window_size, rule, template_params):
 
 
 def evaluate_template(template, target_times, readings, metric='f1', tuning_iterations=50,
-                      preprocessing=0, init_params=None, cost=False, test_size=0.25, cv_splits=3,
-                      random_state=0):
+                      preprocessing=0, init_params=None, cost=False, test_size=0.25,
+                      cv_splits=3, random_state=0, cache_path=None):
     """Returns the scores for a given template.
 
     Args:
@@ -81,6 +81,9 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
             Amount of splits to create.
         random_state (int):
             Random number of train_test split.
+        cache_path (str):
+            If given, cache the generated cross validation splits in this folder.
+            Defatuls to ``None``.
 
     Returns:
         scores (dict):
@@ -93,8 +96,15 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
     if isinstance(metric, str):
         metric, cost = METRICS[metric]
 
-    pipeline = GreenGuardPipeline(template, metric, cost=cost, cv_splits=cv_splits,
-                                  init_params=init_params, preprocessing=preprocessing)
+    pipeline = GreenGuardPipeline(
+        template,
+        metric,
+        cost=cost,
+        cv_splits=cv_splits,
+        init_params=init_params,
+        preprocessing=preprocessing,
+        cache_path=cache_path
+    )
 
     # Computing the default test score
     pipeline.fit(train, readings)
@@ -122,9 +132,10 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
     return scores
 
 
-def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iterations=50,
-                       init_params=None, target_times=None, readings=None, preprocessing=0,
-                       cost=False, test_size=0.25, cv_splits=3, random_state=0, output_path=None):
+def evaluate_templates(templates, window_size_rule, metric='f1',
+                       tuning_iterations=50, init_params=None, target_times=None,
+                       readings=None, preprocessing=0, cost=False, test_size=0.25,
+                       cv_splits=3, random_state=0, cache_path=None, output_path=None):
     """Execute the benchmark process and optionally store the result as a ``CSV``.
 
     Args:
@@ -132,7 +143,7 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
             List of templates to try.
         window_size_rule (list):
             List of tupples (int, str or Timedelta object).
-        metric (function or str).
+        metric (function or str):
             Metric to use. If an ``str`` is give it must be one of the metrics
             defined in the ``greenguard.metrics.METRICS`` dictionary.
         tuning_iterations (int):
@@ -169,6 +180,9 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
             Random number of train_test split.
         output_path (str):
             Path where to save the benchmark report.
+        cache_path (str):
+            If given, cache the generated cross validation splits in this folder.
+            Defatuls to ``None``.
 
     Returns:
         pandas.DataFrame or None:
@@ -237,7 +251,9 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
                 cost=cost,
                 test_size=test_size,
                 cv_splits=cv_splits,
-                random_state=random_state)
+                random_state=random_state,
+                cache_path=cache_path
+            )
 
             scores.update(result)
             scores['status'] = 'OK'
diff --git a/tutorials/03_Benchmarking.ipynb b/tutorials/03_Benchmarking.ipynb
index ad3c41d..ee765a5 100644
--- a/tutorials/03_Benchmarking.ipynb
+++ b/tutorials/03_Benchmarking.ipynb
@@ -1,5 +1,22 @@
 {
  "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Benchmarking"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0. Setup the logging\n",
+    "\n",
+    "This step sets up logging in our environment to increase our visibility over\n",
+    "the steps that GreenGuard performs."
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
@@ -17,47 +34,61 @@
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 2,
+   "cell_type": "markdown",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Using TensorFlow backend.\n"
-     ]
-    },
-    {
-     "data": {
-      "text/plain": [
-       "['unstack_double_lstm_timeseries_classifier',\n",
-       " 'unstack_lstm_timeseries_classifier']"
-      ]
-     },
-     "execution_count": 2,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "from greenguard import get_pipelines\n",
     "\n",
-    "get_pipelines('lstm')"
+    "## Running the Benchmarking\n",
+    "\n",
+    "The user API for the GreenGuard Benchmarking is the `greenguard.benchmark.evaluate_templates` function.\n",
+    "\n",
+    "The `evaluate_templates` function accepts the following arguments:\n",
+    "* `templates (list)`: List of templates to try.\n",
+    "* `window_size_rule (list)`: List of tupples (int, str or Timedelta object).\n",
+    "* `metric (function or str)`: Metric to use. If an ``str`` is give it must be one of the metrics defined in the `greenguard.metrics.METRICS` dictionary.\n",
+    "* `tuning_iterations (int)`: Number of iterations to be used.\n",
+    "* `init_params (dict)`: Initialization parameters for the pipelines.\n",
+    "* `target_times (DataFrame)`: Contains the specefication problem that we are solving, which has three columns:\n",
+    "    * `turbine_id`: Unique identifier of the turbine which this label corresponds to.\n",
+    "    * `cutoff_time`: Time associated with this target.\n",
+    "    * `target`: The value that we want to predict. This can either be a numerical value\n",
+    "        or a categorical label. This column can also be skipped when preparing\n",
+    "        data that will be used only to make predictions and not to fit any\n",
+    "        pipeline.\n",
+    "* `readings (DataFrame)`: Contains the signal data from different sensors, with the following columns:\n",
+    "    * `turbine_id`: Unique identifier of the turbine which this reading comes from.\n",
+    "    * `signal_id`: Unique identifier of the signal which this reading comes from.\n",
+    "    * `timestamp (datetime)`: Time where the reading took place, as a datetime.\n",
+    "    * `value (float)`: Numeric value of this reading.\n",
+    "* `preprocessing (int, list or dict)`: Number of preprocessing steps to be used.\n",
+    "* `cost (bool)`: Wheter the metric is a cost function (the lower the better) or not.\n",
+    "* `test_size (float)`: Percentage of the data set to be used for the test.\n",
+    "* `cv_splits (int)`: Amount of splits to create.\n",
+    "* `random_state (int)`: Random number of train_test split.\n",
+    "* `output_path (str)`: Path where to save the benchmark report.\n",
+    "* `cache_path (str)`: If given, cache the generated cross validation splits in this folder. Defatuls to ``None``."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.benchmark import evaluate_templates"
+    "templates = ['unstack_lstm_timeseries_classifier', 'normalize_dfs_xgb_classifier']\n",
+    "window_size_rule = [('1d', '1h'), ('3d', '4h')]\n",
+    "init_params = {\n",
+    "    'unstack_lstm_timeseries_classifier': {\n",
+    "        'keras.Sequential.LSTMTimeSeriesClassifier#1': {\n",
+    "            'epochs': 1,\n",
+    "        }\n",
+    "    }\n",
+    "}\n"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "metadata": {
     "scrolled": false
    },
@@ -66,43 +97,144 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2020-08-05 17:14:08,860 - INFO - greenguard.pipeline - New configuration found:\n",
+      "Using TensorFlow backend.\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
       "  Template: unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
-      "2020-08-05 17:14:16,974 - INFO - greenguard.pipeline - New configuration found:\n",
-      "  Template: unstack_lstm_timeseries_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): constant\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 397\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.38706239055719976\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 367\n",
-      "2020-08-05 17:14:36,898 - INFO - greenguard.pipeline - New configuration found:\n",
-      "  Template: unstack_lstm_timeseries_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): most_frequent\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 90\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.7472037016839137\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 215\n",
-      "2020-08-05 17:15:00,145 - INFO - greenguard.pipeline - New configuration found:\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
       "  Template: unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
-      "2020-08-05 17:15:14,558 - INFO - greenguard.pipeline - New configuration found:\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
       "  Template: unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
-      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): most_frequent\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 245\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.23326913705083852\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 425\n"
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): median\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 353\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.4718077136146996\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 151\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Built 99 features\n",
+      "Elapsed: 02:58 | Progress: 100%|██████████\n",
+      "Elapsed: 00:58 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 01:54 | Progress: 100%|██████████\n",
+      "Elapsed: 01:08 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 02:20 | Progress: 100%|██████████\n",
+      "Elapsed: 01:09 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 02:16 | Progress: 100%|██████████\n",
+      "Elapsed: 01:07 | Progress: 100%|██████████\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: normalize_dfs_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 100\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: normalize_dfs_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 18\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 920\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 10\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.02731362750079913\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.46258174821600884\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 3\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Built 99 features\n",
+      "Elapsed: 03:18 | Progress: 100%|██████████\n",
+      "Elapsed: 01:03 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 03:15 | Progress: 100%|██████████\n",
+      "Elapsed: 01:06 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 02:05 | Progress: 100%|██████████\n",
+      "Elapsed: 01:10 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 01:51 | Progress: 100%|██████████\n",
+      "Elapsed: 00:54 | Progress: 100%|██████████\n",
+      "Built 99 features\n",
+      "Elapsed: 01:51 | Progress: 100%|██████████\n",
+      "Elapsed: 00:58 | Progress: 100%|██████████\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: normalize_dfs_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 100\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: normalize_dfs_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 7\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 348\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.5272082810065426\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.04014402178038856\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 2\n"
      ]
     },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Built 99 features\n",
+      "Elapsed: 02:42 | Progress: 100%|██████████\n",
+      "Elapsed: 01:00 | Progress: 100%|██████████\n"
+     ]
+    }
+   ],
+   "source": [
+    "from greenguard.benchmark import evaluate_templates\n",
+    "\n",
+    "results = evaluate_templates(\n",
+    "    templates=templates,\n",
+    "    window_size_rule=window_size_rule,\n",
+    "    init_params=init_params,\n",
+    "    tuning_iterations=3,\n",
+    "    cv_splits=3,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
     {
      "data": {
       "text/html": [
@@ -140,9 +272,9 @@
        "      <td>unstack_lstm_timeseries_classifier</td>\n",
        "      <td>1d</td>\n",
        "      <td>1h</td>\n",
-       "      <td>0.603774</td>\n",
-       "      <td>0.624928</td>\n",
-       "      <td>0.638871</td>\n",
+       "      <td>0.711864</td>\n",
+       "      <td>0.646437</td>\n",
+       "      <td>0.646437</td>\n",
        "      <td>0.666667</td>\n",
        "      <td>OK</td>\n",
        "    </tr>\n",
@@ -151,24 +283,50 @@
        "      <td>unstack_lstm_timeseries_classifier</td>\n",
        "      <td>3d</td>\n",
        "      <td>4h</td>\n",
-       "      <td>0.708333</td>\n",
-       "      <td>0.607978</td>\n",
-       "      <td>0.640048</td>\n",
+       "      <td>0.703704</td>\n",
+       "      <td>0.577295</td>\n",
+       "      <td>0.616052</td>\n",
        "      <td>0.709677</td>\n",
        "      <td>OK</td>\n",
        "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>normalize_dfs_xgb_classifier</td>\n",
+       "      <td>1d</td>\n",
+       "      <td>1h</td>\n",
+       "      <td>0.581818</td>\n",
+       "      <td>0.619698</td>\n",
+       "      <td>0.646750</td>\n",
+       "      <td>0.631579</td>\n",
+       "      <td>OK</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>normalize_dfs_xgb_classifier</td>\n",
+       "      <td>3d</td>\n",
+       "      <td>4h</td>\n",
+       "      <td>0.581818</td>\n",
+       "      <td>0.619698</td>\n",
+       "      <td>0.637957</td>\n",
+       "      <td>0.642857</td>\n",
+       "      <td>OK</td>\n",
+       "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
        "                             template window_size resample_rule  default_test  \\\n",
-       "0  unstack_lstm_timeseries_classifier          1d            1h      0.603774   \n",
-       "1  unstack_lstm_timeseries_classifier          3d            4h      0.708333   \n",
+       "0  unstack_lstm_timeseries_classifier          1d            1h      0.711864   \n",
+       "1  unstack_lstm_timeseries_classifier          3d            4h      0.703704   \n",
+       "2        normalize_dfs_xgb_classifier          1d            1h      0.581818   \n",
+       "3        normalize_dfs_xgb_classifier          3d            4h      0.581818   \n",
        "\n",
        "   default_cv  tuned_cv  tuned_test status  \n",
-       "0    0.624928  0.638871    0.666667     OK  \n",
-       "1    0.607978  0.640048    0.709677     OK  "
+       "0    0.646437  0.646437    0.666667     OK  \n",
+       "1    0.577295  0.616052    0.709677     OK  \n",
+       "2    0.619698  0.646750    0.631579     OK  \n",
+       "3    0.619698  0.637957    0.642857     OK  "
       ]
      },
      "execution_count": 4,
@@ -177,22 +335,7 @@
     }
    ],
    "source": [
-    "evaluate_templates(\n",
-    "    templates=[\n",
-    "        'unstack_lstm_timeseries_classifier',\n",
-    "    ],\n",
-    "    window_size_rule=[\n",
-    "        ('1d', '1h'),\n",
-    "        ('3d', '4h'),\n",
-    "    ],\n",
-    "    init_params={\n",
-    "        'keras.Sequential.LSTMTimeSeriesClassifier#1': {\n",
-    "            'epochs': 1,\n",
-    "        }\n",
-    "    },\n",
-    "    tuning_iterations=3,\n",
-    "    cv_splits=3,\n",
-    ")"
+    "results"
    ]
   }
  ],
@@ -212,7 +355,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.6.11"
   }
  },
  "nbformat": 4,

From 5e92532bd044b79f74c369ad8e31f3c8dda00f42 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 11 Aug 2020 03:16:24 +0200
Subject: [PATCH 088/171] Add release notes for v0.2.3

---
 HISTORY.md        | 4 ++++
 README.md         | 1 +
 docker/Dockerfile | 2 +-
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index d9c599b..0607be3 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,9 @@
 # History
 
+## 0.2.3 - 2020-08-10
+
+* Added benchmarking module.
+
 ## 0.2.2 - 2020-07-10
 
 ### Internal Imrpovements
diff --git a/README.md b/README.md
index 9c37cb4..e585050 100644
--- a/README.md
+++ b/README.md
@@ -15,6 +15,7 @@ AutoML for Renewable Energy Industries.
 [![PyPI Shield](https://img.shields.io/pypi/v/greenguard.svg)](https://pypi.python.org/pypi/greenguard)
 [![Travis CI Shield](https://travis-ci.org/signals-dev/GreenGuard.svg?branch=master)](https://travis-ci.org/signals-dev/GreenGuard)
 [![Downloads](https://pepy.tech/badge/greenguard)](https://pepy.tech/project/greenguard)
+[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/signals-dev/GreenGuard/master?filepath=tutorials)
 <!--
 [![Coverage Status](https://codecov.io/gh/signals-dev/GreenGuard/branch/master/graph/badge.svg)](https://codecov.io/gh/signals-dev/GreenGuard)
 -->
diff --git a/docker/Dockerfile b/docker/Dockerfile
index 0917f4c..a086f2b 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -6,7 +6,7 @@ EXPOSE 8888
 RUN mkdir /app
 COPY setup.py /app
 COPY greenguard /app/greenguard
-COPY notebooks /app/notebooks
+COPY tutorials /app/tutorials
 RUN pip install -e /app jupyter
 
 WORKDIR /app

From 942fa67b3775e9b597d263030d1d6cdc1b644337 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 11 Aug 2020 03:19:45 +0200
Subject: [PATCH 089/171] =?UTF-8?q?Bump=20version:=200.2.3.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.2.3?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 3 ++-
 setup.py               | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 17dc390..0247a00 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.3.dev0'
+__version__ = '0.2.3'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 2c808a1..040280a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.3.dev0
+current_version = 0.2.3
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
@@ -45,3 +45,4 @@ test = pytest
 
 [tool:pytest]
 collect_ignore = ['setup.py']
+
diff --git a/setup.py b/setup.py
index 6cb3298..183a4ef 100644
--- a/setup.py
+++ b/setup.py
@@ -103,6 +103,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.3.dev0',
+    version='0.2.3',
     zip_safe=False,
 )

From 79287059d3fb785bea1b5508415db0ecb9385f9d Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 11 Aug 2020 09:51:40 +0200
Subject: [PATCH 090/171] =?UTF-8?q?Bump=20version:=200.2.3=20=E2=86=92=200?=
 =?UTF-8?q?.2.4.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 0247a00..20eb561 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.3'
+__version__ = '0.2.4.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 040280a..44d3a12 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.3
+current_version = 0.2.4.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 183a4ef..8ffd589 100644
--- a/setup.py
+++ b/setup.py
@@ -103,6 +103,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.3',
+    version='0.2.4.dev0',
     zip_safe=False,
 )

From ae334569c1061a6284431abd8b46594106d38020 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 15 Sep 2020 15:03:36 +0200
Subject: [PATCH 091/171] Created make_problems function to automaticly
 generate problems into pickle files.

---
 greenguard/benchmark.py | 82 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 82 insertions(+)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index c0984db..0d8ea30 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -1,10 +1,15 @@
 import logging
+import os
+import pickle
 from itertools import product
 
 import pandas as pd
+from dask.distributed import Client, LocalCluster
 from sklearn.model_selection import train_test_split
+from tqdm import tqdm
 
 from greenguard.demo import load_demo
+from greenguard.loaders import CSVLoader
 from greenguard.metrics import METRICS
 from greenguard.pipeline import GreenGuardPipeline, generate_init_params, generate_preprocessing
 
@@ -273,3 +278,80 @@ def evaluate_templates(templates, window_size_rule, metric='f1',
         results.to_csv(output_path)
     else:
         return results
+
+
+def make_problem(target_times_paths, readings_path, window_size_resample_rule, output_path,
+                 signals=None, aggregation='mean', datetime_fmt='%m/%d/%y %H:%M:%S',
+                 filename_fmt='%Y-%m.csv', unstack=False, parse_dates=['cutoff_time']):
+    """
+    Args:
+        target_times_paths (list):
+            List of paths to CSVs that contain target times.
+        readings_path (str):
+            Path to the folder where readings in raw CSV format can be found.
+        window_size_resample_rule (list):
+            List of tupples (int, str or Timedelta object).
+        output_path (str):
+            Path to save the generated problems.
+        signals (str):
+            List of signal names or csv file that has a `signal_id` column to use as the signal
+            names list.
+        aggregation (str):
+            Aggregation to perform to the.
+        datetime_fmt (str):
+            Date format used by the column timestamp for the readings. Defaults
+            to `%m/%d/%y %H:%M:%S`.
+        filename_fmt (str):
+            Filename format. Defaults to `%Y-%m.csv`.
+        unstack (bool):
+            Whether to unstack the resampled data, generating one column per signal. Only used
+            when resampling. Defaults to `False`.
+    """
+    cluster = LocalCluster(n_workers=16, dashboard_adress=':9792')
+    client = Client(cluster)
+
+    for tt_path in tqdm(target_times_paths):
+        if parse_dates:
+            parse_dates = [parse_dates] if not isinstance(parse_dates, list) else parse_dates
+            target_times = pd.read_csv(tt_path, parse_dates=parse_dates)
+        else:
+            target_times = pd.read_csv(tt_path)
+
+        for window_size, rule in window_size_resample_rule:
+            csv_loader = CSVLoader(
+                readings_path,
+                rule=rule,
+                aggregation=aggregation,
+                datetime_fmt=datetime_fmt,
+            )
+
+            new_target_times, readings = csv_loader.load(
+                target_times,
+                window_size=window_size,
+                signals=signals,
+            )
+
+            problem_name = 'problem_{}_{}.pkl'.format(window_size, rule)
+            output_pickle = os.path.join(output_path, problem_name)
+
+            with open(output_pickle, 'wb') as pickle_file:
+                pickle.dump((new_target_times, readings), pickle_file)
+
+    client.shutdown()
+
+
+def benchmark():
+    """
+    Args:
+        pipelines (list):
+        problem_paths (list):
+        target_times_paths (list):
+        readings_path (list):
+        window_size_resample_rule (list):
+            List of tupples (int, str or Timedelta object).
+        tuning_iterations (int):
+        cv_splits (int):
+        cache_path (str):
+        output_path (str):
+    """
+    pass

From a1244ba992c8767de8c5c507ff38a4dc17eb4973 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 15 Sep 2020 15:59:30 +0200
Subject: [PATCH 092/171] Return generated problems paths

---
 greenguard/benchmark.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 0d8ea30..35c9117 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -310,6 +310,8 @@ def make_problem(target_times_paths, readings_path, window_size_resample_rule, o
     cluster = LocalCluster(n_workers=16, dashboard_adress=':9792')
     client = Client(cluster)
 
+    generated_problems = list()
+
     for tt_path in tqdm(target_times_paths):
         if parse_dates:
             parse_dates = [parse_dates] if not isinstance(parse_dates, list) else parse_dates
@@ -322,6 +324,7 @@ def make_problem(target_times_paths, readings_path, window_size_resample_rule, o
                 readings_path,
                 rule=rule,
                 aggregation=aggregation,
+                unstack=unstack,
                 datetime_fmt=datetime_fmt,
             )
 
@@ -332,13 +335,16 @@ def make_problem(target_times_paths, readings_path, window_size_resample_rule, o
             )
 
             problem_name = 'problem_{}_{}.pkl'.format(window_size, rule)
-            output_pickle = os.path.join(output_path, problem_name)
-
-            with open(output_pickle, 'wb') as pickle_file:
+            output_pickle_path = os.path.join(output_path, problem_name)
+            with open(output_pickle_path, 'wb') as pickle_file:
                 pickle.dump((new_target_times, readings), pickle_file)
 
+            generated_problems.append(output_pickle_path)
+
     client.shutdown()
 
+    return generated_problems
+
 
 def benchmark():
     """

From b05aa355008facada3796c4534e4cab647cfc11b Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 15 Sep 2020 18:50:38 +0200
Subject: [PATCH 093/171] Benchmark draft

---
 greenguard/benchmark.py | 169 +++++++++++++++++++++++++++++++++-------
 greenguard/utils.py     |   8 ++
 2 files changed, 148 insertions(+), 29 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 35c9117..281039a 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -12,10 +12,17 @@
 from greenguard.loaders import CSVLoader
 from greenguard.metrics import METRICS
 from greenguard.pipeline import GreenGuardPipeline, generate_init_params, generate_preprocessing
+from greenguard.utils import as_list
 
 LOGGER = logging.getLogger(__name__)
 
 
+def _create_client(n_workers=16, dashboard_adress=':9792'):
+    cluster = LocalCluster(n_workers=n_workers, dashboard_adress=dashboard_adress)
+    client = Client(cluster)
+    return client
+
+
 def _build_init_params(template, window_size, rule, template_params):
     if 'dfs' in template:
         window_size_rule_params = {
@@ -280,9 +287,29 @@ def evaluate_templates(templates, window_size_rule, metric='f1',
         return results
 
 
+def _generate_target_times_readings(target_times, readings_path, window_size, rule,
+                                    signals, aggregation, datetime_fmt='%m/%d/%y %H:%M:%S',
+                                    filename_fmt='%Y-%m.csv'):
+    """
+    Returns:
+        pandas.DataFrame:
+            Table of readings for the target times, including the columns ``turbine_id``,
+            ``signal_id``, ``timestamp`` and ``value``.
+    """
+    csv_loader = CSVLoader(
+        readings_path,
+        rule=rule,
+        aggregation=aggregation,
+        datetime_fmt=datetime_fmt,
+        filename_fmt=filename_fmt,
+    )
+
+    return csv_loader.load(target_times, window_size=window_size, signals=signals)
+
+
 def make_problem(target_times_paths, readings_path, window_size_resample_rule, output_path,
                  signals=None, aggregation='mean', datetime_fmt='%m/%d/%y %H:%M:%S',
-                 filename_fmt='%Y-%m.csv', unstack=False, parse_dates=['cutoff_time']):
+                 filename_fmt='%Y-%m.csv', n_workers=16, dashboard_adress=':9792'):
     """
     Args:
         target_times_paths (list):
@@ -303,41 +330,29 @@ def make_problem(target_times_paths, readings_path, window_size_resample_rule, o
             to `%m/%d/%y %H:%M:%S`.
         filename_fmt (str):
             Filename format. Defaults to `%Y-%m.csv`.
-        unstack (bool):
-            Whether to unstack the resampled data, generating one column per signal. Only used
-            when resampling. Defaults to `False`.
+        n_workers (int):
+        dashboard_adress (str):
     """
-    cluster = LocalCluster(n_workers=16, dashboard_adress=':9792')
-    client = Client(cluster)
-
+    client = _create_client(n_workers, dashboard_adress)
     generated_problems = list()
+    target_times_paths = as_list(target_times_paths)
 
     for tt_path in tqdm(target_times_paths):
-        if parse_dates:
-            parse_dates = [parse_dates] if not isinstance(parse_dates, list) else parse_dates
-            target_times = pd.read_csv(tt_path, parse_dates=parse_dates)
-        else:
-            target_times = pd.read_csv(tt_path)
-
         for window_size, rule in window_size_resample_rule:
-            csv_loader = CSVLoader(
+            new_target_times, readings = _generate_target_times_readings(
+                tt_path,
                 readings_path,
-                rule=rule,
-                aggregation=aggregation,
-                unstack=unstack,
-                datetime_fmt=datetime_fmt,
-            )
-
-            new_target_times, readings = csv_loader.load(
-                target_times,
-                window_size=window_size,
-                signals=signals,
+                rule,
+                aggregation,
+                signals,
+                datetime_fmt,
+                filename_fmt
             )
 
             problem_name = 'problem_{}_{}.pkl'.format(window_size, rule)
             output_pickle_path = os.path.join(output_path, problem_name)
             with open(output_pickle_path, 'wb') as pickle_file:
-                pickle.dump((new_target_times, readings), pickle_file)
+                pickle.dump((new_target_times, readings, window_size, rule), pickle_file)
 
             generated_problems.append(output_pickle_path)
 
@@ -346,18 +361,114 @@ def make_problem(target_times_paths, readings_path, window_size_resample_rule, o
     return generated_problems
 
 
-def benchmark():
+def benchmark(templates, problem_paths=None, target_times_paths=None, readings_path=None,
+              window_size_resample_rule=None, signals=None, tuning_iterations=100, preprocessing=0,
+              init_params=None, aggregation='mean', cost=False, cv_splits=5, metric='f1',
+              test_size=0.33, random_state=0, cache_path=None, n_workers=16,
+              dashboard_adress=':9792', output_path=None, datetime_fmt='%m/%d/%y %H:%M:%S',
+              filename_fmt='%Y-%m.csv'):
     """
     Args:
-        pipelines (list):
+        templates (list):
         problem_paths (list):
         target_times_paths (list):
-        readings_path (list):
+        readings_path (str):
         window_size_resample_rule (list):
             List of tupples (int, str or Timedelta object).
+        signals (str):
+            List of signal names or csv file that has a `signal_id` column to use as the signal
+            names list.
+        aggregation (str):
+            Aggregation to perform to the.
+        datetime_fmt (str):
+            Date format used by the column timestamp for the readings. Defaults
+            to `%m/%d/%y %H:%M:%S`.
+        n_workers
+        dashboard_adress
+        filename_fmt (str):
+            Filename format. Defaults to `%Y-%m.csv`.
         tuning_iterations (int):
+        preprocessing :
+        init_params :
+        cost :
+        test_size :
         cv_splits (int):
         cache_path (str):
+
         output_path (str):
     """
-    pass
+    templates = as_list(templates)
+    results = list()
+
+    if target_times_paths:
+        target_times_paths = as_list(target_times_paths)
+        if not readings_path:
+            raise ValueError('Missing readings path.')
+
+        client = _create_client(n_workers, dashboard_adress)
+
+        for tt_path in tqdm(target_times_paths):
+            for window_size, rule in window_size_resample_rule:
+                target_times, readings = _generate_target_times_readings(
+                    tt_path,
+                    readings_path,
+                    rule,
+                    aggregation,
+                    signals,
+                    datetime_fmt,
+                    filename_fmt,
+                )
+
+                df = evaluate_templates(
+                    templates,
+                    [(window_size, rule)],
+                    metric=metric,
+                    tuning_iterations=tuning_iterations,
+                    init_params=init_params,
+                    target_times=target_times,
+                    readings=readings,
+                    preprocessing=preprocessing,
+                    cost=cost,
+                    test_size=test_size,
+                    cv_splits=cv_splits,
+                    random_state=random_state,
+                    cache_path=cache_path,
+                    output_path=None
+                )
+
+                results.append(df)
+
+        client.shutdown()
+
+    else:
+        problem_paths = as_list(problem_paths)
+
+        for problem_path in tqdm(problem_paths):
+            with open(problem_path, 'rb') as pickle_file:
+                target_times, readings, window_size, rule = pickle.load(pickle_file)
+
+            df = evaluate_templates(
+                templates,
+                [(window_size, rule)],
+                metric=metric,
+                tuning_iterations=tuning_iterations,
+                init_params=init_params,
+                target_times=target_times,
+                readings=readings,
+                preprocessing=preprocessing,
+                cost=cost,
+                test_size=test_size,
+                cv_splits=cv_splits,
+                random_state=random_state,
+                cache_path=cache_path,
+                output_path=None
+            )
+
+            results.append(df)
+
+    results = pd.concat(results, ignore_index=True)
+
+    if output_path:
+        results.to_csv(output_path)
+
+    return results
diff --git a/greenguard/utils.py b/greenguard/utils.py
index a803f97..15d46e1 100644
--- a/greenguard/utils.py
+++ b/greenguard/utils.py
@@ -53,3 +53,11 @@ def logging_setup(verbosity=1, logfile=None, logger_name=None):
         console_handler.setLevel(log_level)
         console_handler.setFormatter(formatter)
         logger.addHandler(console_handler)
+
+
+def as_list(param):
+    """Make sure that param is a ``list``."""
+    if isinstance(param, (list, tuple)):
+        return param
+
+    return [param]

From 55dcb1905b4a6bcf4f30c1d216ef439f2c44e075 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 21 Sep 2020 20:01:09 +0200
Subject: [PATCH 094/171] Review Data Format and Structure

---
 DATA_FORMAT.md | 119 +++++++++++++++++++++++++++++++++++++++++++++++++
 README.md      |   7 ++-
 2 files changed, 125 insertions(+), 1 deletion(-)
 create mode 100644 DATA_FORMAT.md

diff --git a/DATA_FORMAT.md b/DATA_FORMAT.md
new file mode 100644
index 0000000..b0fc5a3
--- /dev/null
+++ b/DATA_FORMAT.md
@@ -0,0 +1,119 @@
+# GreenGuard Data Format
+
+## Input
+
+The minimum input expected by the **GreenGuard** system consists of the following two elements,
+which need to be passed as `pandas.DataFrame` objects:
+
+### Target Times
+
+A table containing the specification of the problem that we are solving, which has three
+columns:
+
+* `turbine_id`: Unique identifier of the turbine which this label corresponds to.
+* `cutoff_time`: Time associated with this target
+* `target`: The value that we want to predict. This can either be a numerical value or a
+  categorical label. This column can also be skipped when preparing data that will be used
+  only to make predictions and not to fit any pipeline.
+
+|    | turbine_id   | cutoff_time         |   target |
+|----|--------------|---------------------|----------|
+|  0 | T1           | 2001-01-02 00:00:00 |        0 |
+|  1 | T1           | 2001-01-03 00:00:00 |        1 |
+|  2 | T2           | 2001-01-04 00:00:00 |        0 |
+
+### Readings
+
+A table containing the signal data from the different sensors, with the following columns:
+
+  * `turbine_id`: Unique identifier of the turbine which this reading comes from.
+  * `signal_id`: Unique identifier of the signal which this reading comes from.
+  * `timestamp (datetime)`: Time where the reading took place, as a datetime.
+  * `value (float)`: Numeric value of this reading.
+
+|    | turbine_id   | signal_id   | timestamp           |   value |
+|----|--------------|-------------|---------------------|---------|
+|  0 | T1           | S1          | 2001-01-01 00:00:00 |       1 |
+|  1 | T1           | S1          | 2001-01-01 12:00:00 |       2 |
+|  2 | T1           | S1          | 2001-01-02 00:00:00 |       3 |
+|  3 | T1           | S1          | 2001-01-02 12:00:00 |       4 |
+|  4 | T1           | S1          | 2001-01-03 00:00:00 |       5 |
+|  5 | T1           | S1          | 2001-01-03 12:00:00 |       6 |
+|  6 | T1           | S2          | 2001-01-01 00:00:00 |       7 |
+|  7 | T1           | S2          | 2001-01-01 12:00:00 |       8 |
+|  8 | T1           | S2          | 2001-01-02 00:00:00 |       9 |
+|  9 | T1           | S2          | 2001-01-02 12:00:00 |      10 |
+| 10 | T1           | S2          | 2001-01-03 00:00:00 |      11 |
+| 11 | T1           | S2          | 2001-01-03 12:00:00 |      12 |
+
+### Turbines
+
+Optionally, a third table can be added containing metadata about the turbines.
+The only requirement for this table is to have a `turbine_id` field, and it can have
+an arbitraty number of additional fields.
+
+|    | turbine_id   | manufacturer   | ...   | ...   | ...   |
+|----|--------------|----------------|-------|-------|-------|
+|  0 | T1           | Siemens        | ...   | ...   | ...   |
+|  1 | T2           | Siemens        | ...   | ...   | ...   |
+
+
+## CSV Format
+
+As explained in a previous section, the input expected by the **GreenGuard** system consists of
+two tables which need to be passed as `pandas.DataFrame` objects:
+
+* The `target_times` table, which containing the specification of the problem that we are solving
+  in the form of training examples with a `turbine_id`, a `cutoff_time` and a `target` value.
+* The `readings` table, which contains the signal readings from the different sensors, with
+  `turbine_id`, `signal_id`, `timestamp` and `value` fields.
+
+However, in most scenarios the size of the available will far exceed the memory limitations
+of the system on which **GreenGuard** is being run, so loading all the data in a single
+`pandas.DataFrame` will not be possible.
+
+In order to solve this situation, **GreenGuard** provides a [CSVLoader](
+https://d3-ai.github.io/GreenGuard/api/greenguard.loaders.csv.html#greenguard.loaders.csv.CSVLoader)
+class which can be used to load data from what we call the **Raw Data Format**.
+
+### Raw Data Format
+
+The **Raw Data Format** consists on a collection of CSV files stored in a single folder with the
+following structure:
+
+#### Folder Structure
+
+* All the data from all the turbines is inside a single folder, which here we will call `readings`.
+* Inside the `readings` folder, one folder exists for each turbine, named exactly like the turbine:
+    * `readings/T001`
+    * `readings/T002`
+    * ...
+* Inside each turbine folder one CSV file exists for each month, named `%Y-%m.csv`.
+    * `readings/T001/2010-01.csv`
+    * `readings/T001/2010-02.csv`
+    * `readings/T001/2010-03.csv`
+    * ...
+
+#### CSV Contents
+
+* Each CSV file contains three columns:
+    * `signal_id`: name or id of the signal.
+    * ``timestamp``: timestamp of the reading formatted as ``%m/%d/%y %H:%M:%S``.
+    * `value`: value of the reading.
+
+This is an example of what a CSV contents look like:
+
+|    | signal_id   | timestamp         |   value |
+|----|-------------|-------------------|---------|
+|  0 | S1          | 01/01/01 00:00:00 |       1 |
+|  1 | S1          | 01/01/01 12:00:00 |       2 |
+|  2 | S1          | 01/02/01 00:00:00 |       3 |
+|  3 | S1          | 01/02/01 12:00:00 |       4 |
+|  4 | S1          | 01/03/01 00:00:00 |       5 |
+|  5 | S1          | 01/03/01 12:00:00 |       6 |
+|  6 | S2          | 01/01/01 00:00:00 |       7 |
+|  7 | S2          | 01/01/01 12:00:00 |       8 |
+|  8 | S2          | 01/02/01 00:00:00 |       9 |
+|  9 | S2          | 01/02/01 12:00:00 |      10 |
+| 10 | S2          | 01/03/01 00:00:00 |      11 |
+| 11 | S2          | 01/03/01 12:00:00 |      12 |
diff --git a/README.md b/README.md
index e585050..4e4b086 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ AutoML for Renewable Energy Industries.
 - Documentation: https://signals-dev.github.io/GreenGuard
 - Homepage: https://github.com/signals-dev/GreenGuard
 
-# Overview
+## Overview
 
 The GreenGuard project is a collection of end-to-end solutions for machine learning problems
 commonly found in monitoring wind energy production systems. Most tasks utilize sensor data
@@ -44,6 +44,11 @@ The salient aspects of this customized project are:
 * A robust continuous integration and testing infrastructure.
 * A ``learning database`` recording all past outcomes --> tasks, pipelines, outcomes.
 
+## Resources
+
+* [Data Format](DATA_FORMAT.md).
+* [GreenGuard folder structure](DATA_FORMAT.md#folder-structure).
+
 # Install
 
 ## Requirements

From c887f4e7a908e8e80178cb5a64632a49de8f141f Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 21 Sep 2020 20:09:07 +0200
Subject: [PATCH 095/171] Update links

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 4e4b086..adbc11e 100644
--- a/README.md
+++ b/README.md
@@ -141,7 +141,7 @@ A part from the in-memory data format explained above, which is limited by the m
 allocation capabilities of the system where it is run, **GreenGuard** is also prepared to
 load and work with data stored as a collection of CSV files, drastically increasing the amount
 of data which it can work with. Further details about this format can be found in the
-[project documentation site](https://signals-dev.github.io/GreenGuard/advanced_usage/csv.html).
+[project documentation site](DATA_FORMAT.md#csv-format).
 
 # Quickstart
 

From d176a3527e7e2541c959464b041c527e70df5a7e Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 22 Sep 2020 01:56:50 +0200
Subject: [PATCH 096/171] Curate dependancies

---
 setup.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 8ffd589..7872309 100644
--- a/setup.py
+++ b/setup.py
@@ -16,11 +16,13 @@
     history = ''
 
 install_requires = [
+    'boto3==1.14.44',
+    'botocore==1.17.44',
+    'baytune>=0.3.9,<0.4',
     'Keras>=2.1.6,<2.4',
     'mlblocks>=0.3.4,<0.4',
     'mlprimitives>=0.2.5,<0.3',
     'scipy>=1.0.1,<1.4.0',
-    'baytune>=0.3.9,<0.4',
     'numpy>=1.15.4,<1.17',
     'pymongo>=3.7.2,<4',
     'scikit-learn>=0.20.1,<0.21',

From 0526bb5f8cf31f393dfdaacd075bbcd561f33733 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 25 Sep 2020 15:22:32 +0200
Subject: [PATCH 097/171] Add release notes for v0.2.4

---
 HISTORY.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/HISTORY.md b/HISTORY.md
index 0607be3..abc8226 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,9 @@
 # History
 
+## 0.2.4 - 2020-09-25
+
+* Fix dependency errors
+
 ## 0.2.3 - 2020-08-10
 
 * Added benchmarking module.

From e5a4c389193e108348202b138988925c6a89c4b3 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 25 Sep 2020 15:22:40 +0200
Subject: [PATCH 098/171] =?UTF-8?q?Bump=20version:=200.2.4.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.2.4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 20eb561..80c8ff7 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.4.dev0'
+__version__ = '0.2.4'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 44d3a12..3d700c2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.4.dev0
+current_version = 0.2.4
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 7872309..c1ee678 100644
--- a/setup.py
+++ b/setup.py
@@ -105,6 +105,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.4.dev0',
+    version='0.2.4',
     zip_safe=False,
 )

From f8649547f6e6659d103acd862a994d7a6a040955 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 25 Sep 2020 16:41:00 +0200
Subject: [PATCH 099/171] =?UTF-8?q?Bump=20version:=200.2.4=20=E2=86=92=200?=
 =?UTF-8?q?.2.5.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 80c8ff7..4b2b765 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.4'
+__version__ = '0.2.5.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 3d700c2..5fdd02b 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.4
+current_version = 0.2.5.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index c1ee678..a7cd421 100644
--- a/setup.py
+++ b/setup.py
@@ -105,6 +105,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.4',
+    version='0.2.5.dev0',
     zip_safe=False,
 )

From dc30fb3ab4754787f9a0ade7429c56091c74859d Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 29 Sep 2020 20:12:02 +0200
Subject: [PATCH 100/171] Add command line for benchmark

---
 greenguard/benchmark.py | 167 +++++++++++++++++++++-------------------
 setup.py                |   1 +
 2 files changed, 88 insertions(+), 80 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 281039a..d59f370 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -1,10 +1,12 @@
+import argparse
 import logging
 import os
 import pickle
+import sys
+import warnings
 from itertools import product
 
 import pandas as pd
-from dask.distributed import Client, LocalCluster
 from sklearn.model_selection import train_test_split
 from tqdm import tqdm
 
@@ -17,12 +19,6 @@
 LOGGER = logging.getLogger(__name__)
 
 
-def _create_client(n_workers=16, dashboard_adress=':9792'):
-    cluster = LocalCluster(n_workers=n_workers, dashboard_adress=dashboard_adress)
-    client = Client(cluster)
-    return client
-
-
 def _build_init_params(template, window_size, rule, template_params):
     if 'dfs' in template:
         window_size_rule_params = {
@@ -287,9 +283,7 @@ def evaluate_templates(templates, window_size_rule, metric='f1',
         return results
 
 
-def _generate_target_times_readings(target_times, readings_path, window_size, rule,
-                                    signals, aggregation, datetime_fmt='%m/%d/%y %H:%M:%S',
-                                    filename_fmt='%Y-%m.csv'):
+def _generate_target_times_readings(target_times, readings_path, window_size, rule, signals):
     """
     Returns:
         pandas.DataFrame:
@@ -299,17 +293,13 @@ def _generate_target_times_readings(target_times, readings_path, window_size, ru
     csv_loader = CSVLoader(
         readings_path,
         rule=rule,
-        aggregation=aggregation,
-        datetime_fmt=datetime_fmt,
-        filename_fmt=filename_fmt,
     )
 
     return csv_loader.load(target_times, window_size=window_size, signals=signals)
 
 
-def make_problem(target_times_paths, readings_path, window_size_resample_rule, output_path,
-                 signals=None, aggregation='mean', datetime_fmt='%m/%d/%y %H:%M:%S',
-                 filename_fmt='%Y-%m.csv', n_workers=16, dashboard_adress=':9792'):
+def make_problem(target_times_paths, readings_path, window_size_resample_rule,
+                 output_path, signals=None):
     """
     Args:
         target_times_paths (list):
@@ -323,30 +313,19 @@ def make_problem(target_times_paths, readings_path, window_size_resample_rule, o
         signals (str):
             List of signal names or csv file that has a `signal_id` column to use as the signal
             names list.
-        aggregation (str):
-            Aggregation to perform to the.
-        datetime_fmt (str):
-            Date format used by the column timestamp for the readings. Defaults
-            to `%m/%d/%y %H:%M:%S`.
-        filename_fmt (str):
-            Filename format. Defaults to `%Y-%m.csv`.
-        n_workers (int):
-        dashboard_adress (str):
     """
-    client = _create_client(n_workers, dashboard_adress)
     generated_problems = list()
     target_times_paths = as_list(target_times_paths)
 
-    for tt_path in tqdm(target_times_paths):
+    for target_time_path in tqdm(target_times_paths):
         for window_size, rule in window_size_resample_rule:
+            target_times = pd.read_csv(target_time_path, parse_dates=['cutoff_time'])
             new_target_times, readings = _generate_target_times_readings(
-                tt_path,
+                target_times,
                 readings_path,
+                window_size,
                 rule,
-                aggregation,
-                signals,
-                datetime_fmt,
-                filename_fmt
+                signals=signals,
             )
 
             problem_name = 'problem_{}_{}.pkl'.format(window_size, rule)
@@ -356,17 +335,13 @@ def make_problem(target_times_paths, readings_path, window_size_resample_rule, o
 
             generated_problems.append(output_pickle_path)
 
-    client.shutdown()
-
     return generated_problems
 
 
 def benchmark(templates, problem_paths=None, target_times_paths=None, readings_path=None,
-              window_size_resample_rule=None, signals=None, tuning_iterations=100, preprocessing=0,
-              init_params=None, aggregation='mean', cost=False, cv_splits=5, metric='f1',
-              test_size=0.33, random_state=0, cache_path=None, n_workers=16,
-              dashboard_adress=':9792', output_path=None, datetime_fmt='%m/%d/%y %H:%M:%S',
-              filename_fmt='%Y-%m.csv'):
+              window_size_resample_rule=None, signals=None, tuning_iterations=100,
+              preprocessing=None, init_params=None, cost=False, cv_splits=5, metric='f1',
+              test_size=0.33, random_state=0, cache_path=None, output_path=None):
     """
     Args:
         templates (list):
@@ -378,15 +353,6 @@ def benchmark(templates, problem_paths=None, target_times_paths=None, readings_p
         signals (str):
             List of signal names or csv file that has a `signal_id` column to use as the signal
             names list.
-        aggregation (str):
-            Aggregation to perform to the.
-        datetime_fmt (str):
-            Date format used by the column timestamp for the readings. Defaults
-            to `%m/%d/%y %H:%M:%S`.
-        n_workers
-        dashboard_adress
-        filename_fmt (str):
-            Filename format. Defaults to `%Y-%m.csv`.
         tuning_iterations (int):
         preprocessing :
         init_params :
@@ -394,7 +360,6 @@ def benchmark(templates, problem_paths=None, target_times_paths=None, readings_p
         test_size :
         cv_splits (int):
         cache_path (str):
-
         output_path (str):
     """
     templates = as_list(templates)
@@ -402,21 +367,17 @@ def benchmark(templates, problem_paths=None, target_times_paths=None, readings_p
 
     if target_times_paths:
         target_times_paths = as_list(target_times_paths)
-        if not readings_path:
+        if readings_path is None:
             raise ValueError('Missing readings path.')
 
-        client = _create_client(n_workers, dashboard_adress)
-
         for tt_path in tqdm(target_times_paths):
             for window_size, rule in window_size_resample_rule:
+                target_times = pd.read_csv(tt_path, parse_dates=['cutoff_time'])
                 target_times, readings = _generate_target_times_readings(
                     tt_path,
                     readings_path,
                     rule,
-                    aggregation,
                     signals,
-                    datetime_fmt,
-                    filename_fmt,
                 )
 
                 df = evaluate_templates(
@@ -438,37 +399,83 @@ def benchmark(templates, problem_paths=None, target_times_paths=None, readings_p
 
                 results.append(df)
 
-        client.shutdown()
-
     else:
-        problem_paths = as_list(problem_paths)
 
+        problem_paths = as_list(problem_paths)
         for problem_path in tqdm(problem_paths):
             with open(problem_path, 'rb') as pickle_file:
-                target_times, readings, window_size, rule = pickle.load(pickle_file)
-
-            df = evaluate_templates(
-                templates,
-                [(window_size, rule)],
-                metric=metric,
-                tuning_iterations=tuning_iterations,
-                init_params=init_params,
-                target_times=target_times,
-                readings=readings,
-                preprocessing=preprocessing,
-                cost=cost,
-                test_size=test_size,
-                cv_splits=cv_splits,
-                random_state=random_state,
-                cache_path=cache_path,
-                output_path=None
-            )
-
-            results.append(df)
+                target_times, readings, pickle_window_size, pickle_rule = pickle.load(pickle_file)
+
+            if window_size_resample_rule is None:
+                window_size_resample_rule = [(pickle_window_size, pickle_rule)]
+
+            for window_size, resample_rule in window_size_resample_rule:
+
+                # window_size can be only smaller than pickle window size
+                # resample rule can be only bigger than picke rule
+                if (pd.to_timedelta(pickle_window_size) >= pd.to_timedelta(window_size)
+                        and pd.to_timedelta(pickle_rule) <= pd.to_timedelta(resample_rule)): # noqa W503
+
+                    df = evaluate_templates(
+                        templates,
+                        [(window_size, rule)],
+                        metric=metric,
+                        tuning_iterations=tuning_iterations,
+                        init_params=init_params,
+                        target_times=target_times,
+                        readings=readings,
+                        preprocessing=preprocessing,
+                        cost=cost,
+                        test_size=test_size,
+                        cv_splits=cv_splits,
+                        random_state=random_state,
+                        cache_path=cache_path,
+                        output_path=None
+                    )
+
+                    results.append(df)
+
+                else:
+                    msg = (f'Invalid window size of {window_size} for {pickle_window_size}'
+                           f' or invalid resample rule {resample_rule} for {pickle_rule}.')
+                    LOGGER.info(msg)
 
     results = pd.concat(results, ignore_index=True)
 
     if output_path:
-        results.to_csv(output_path)
+        results.to_csv(output_path, index=False)
+
+    else:
+        return results
+
+
+def _get_parser():
+    parser = argparse.ArgumentParser(description='GreenGuard Benchmark Command Line Interface.')
+    parser.set_defaults(action=benchmark)
+
+    # Add arguments
+    parser.add_argument('-t', '--templates', nargs='+', help='List of templates to try.')
+    parser.add_argument('-p', '--problems', nargs='+', help='Paths to problems to be benchmarked.')
+    parser.add_argument('-w', '--window-size-resample-rule', nargs='+',
+                        help='List of tuples with window size and resample rule to benchmark.')
+    parser.add_argument('-i', '--tuning-iterations', type=int, default=100,
+                        help='Number of tuning iterations to perform per problem per pipeline.')
+
+    return parser
+
+
+def main():
+    warnings.filterwarnings("ignore")
+
+    # Parse args
+    parser = _get_parser()
+    args = parser.parse_args()
+    if args.templates is None:
+        parser.print_help()
+        sys.exit(0)
+
+    args.action(**args)
+
 
-    return results
+if __name__ == '__main__':
+    main()
diff --git a/setup.py b/setup.py
index 8ffd589..098b67a 100644
--- a/setup.py
+++ b/setup.py
@@ -16,6 +16,7 @@
     history = ''
 
 install_requires = [
+    'matplotlib<3.2.2',
     'Keras>=2.1.6,<2.4',
     'mlblocks>=0.3.4,<0.4',
     'mlprimitives>=0.2.5,<0.3',

From d66d52cf1b3394a0eed85fc11a9124a52643a3a2 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Mon, 5 Oct 2020 19:41:11 +0200
Subject: [PATCH 101/171] Rename make_problems, export them with the given
 problem name and update return values if no output_path is given

---
 greenguard/benchmark.py | 40 ++++++++++++++++++++++++++--------------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index d59f370..a441a94 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -298,8 +298,8 @@ def _generate_target_times_readings(target_times, readings_path, window_size, ru
     return csv_loader.load(target_times, window_size=window_size, signals=signals)
 
 
-def make_problem(target_times_paths, readings_path, window_size_resample_rule,
-                 output_path, signals=None):
+def make_problems(target_times_paths, readings_path, window_size_resample_rule,
+                  output_path=None, signals=None):
     """
     Args:
         target_times_paths (list):
@@ -314,10 +314,17 @@ def make_problem(target_times_paths, readings_path, window_size_resample_rule,
             List of signal names or csv file that has a `signal_id` column to use as the signal
             names list.
     """
-    generated_problems = list()
-    target_times_paths = as_list(target_times_paths)
+    if isinstance(target_times_paths, str):
+        target_times_paths = [target_times_paths]
+    if isinstance(target_times_paths, list):
+        target_times_paths = {os.path.basename(path)[:-4]: path for path in target_times_paths}
 
-    for target_time_path in tqdm(target_times_paths):
+    if output_path:
+        generated_problems = list()
+    else:
+        generated_problems = {}
+
+    for name, target_time_path in tqdm(target_times_paths.values()):
         for window_size, rule in window_size_resample_rule:
             target_times = pd.read_csv(target_time_path, parse_dates=['cutoff_time'])
             new_target_times, readings = _generate_target_times_readings(
@@ -328,12 +335,17 @@ def make_problem(target_times_paths, readings_path, window_size_resample_rule,
                 signals=signals,
             )
 
-            problem_name = 'problem_{}_{}.pkl'.format(window_size, rule)
-            output_pickle_path = os.path.join(output_path, problem_name)
-            with open(output_pickle_path, 'wb') as pickle_file:
-                pickle.dump((new_target_times, readings, window_size, rule), pickle_file)
+            problem_name = '{}_{}_{}.pkl'.format(name, window_size, rule)
 
-            generated_problems.append(output_pickle_path)
+            if output_path:
+                output_pickle_path = os.path.join(output_path, problem_name)
+                with open(output_pickle_path, 'wb') as pickle_file:
+                    pickle.dump((new_target_times, readings, window_size, rule), pickle_file)
+
+                generated_problems.append(output_pickle_path)
+
+            else:
+                generated_problems[problem_name] = (new_target_times, readings, window_size, rule)
 
     return generated_problems
 
@@ -371,18 +383,18 @@ def benchmark(templates, problem_paths=None, target_times_paths=None, readings_p
             raise ValueError('Missing readings path.')
 
         for tt_path in tqdm(target_times_paths):
-            for window_size, rule in window_size_resample_rule:
+            for window_size, resample_rule in window_size_resample_rule:
                 target_times = pd.read_csv(tt_path, parse_dates=['cutoff_time'])
                 target_times, readings = _generate_target_times_readings(
                     tt_path,
                     readings_path,
-                    rule,
+                    resample_rule,
                     signals,
                 )
 
                 df = evaluate_templates(
                     templates,
-                    [(window_size, rule)],
+                    [(window_size, resample_rule)],
                     metric=metric,
                     tuning_iterations=tuning_iterations,
                     init_params=init_params,
@@ -418,7 +430,7 @@ def benchmark(templates, problem_paths=None, target_times_paths=None, readings_p
 
                     df = evaluate_templates(
                         templates,
-                        [(window_size, rule)],
+                        [(window_size, resample_rule)],
                         metric=metric,
                         tuning_iterations=tuning_iterations,
                         init_params=init_params,

From 321932fe9a3a6865d46fdf26b467ed4ea6fb8009 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Wed, 7 Oct 2020 13:17:05 +0200
Subject: [PATCH 102/171] Benchmarking and command line implementation

---
 greenguard/benchmark.py | 276 ++++++++++++++++++++++++++++------------
 1 file changed, 193 insertions(+), 83 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index a441a94..3747653 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -4,16 +4,20 @@
 import pickle
 import sys
 import warnings
+from datetime import datetime
 from itertools import product
 
 import pandas as pd
+import tabulate
 from sklearn.model_selection import train_test_split
 from tqdm import tqdm
 
+from greenguard import get_pipelines
 from greenguard.demo import load_demo
 from greenguard.loaders import CSVLoader
 from greenguard.metrics import METRICS
 from greenguard.pipeline import GreenGuardPipeline, generate_init_params, generate_preprocessing
+from greenguard.results import summarize_results
 from greenguard.utils import as_list
 
 LOGGER = logging.getLogger(__name__)
@@ -97,7 +101,10 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
         scores (dict):
             Stores the four types of scores that are being evaluate.
     """
+    start_time = datetime.utcnow()
+
     scores = dict()
+    scores['metric'] = metric
 
     train, test = train_test_split(target_times, test_size=test_size, random_state=random_state)
 
@@ -115,14 +122,18 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
     )
 
     # Computing the default test score
+    fit_predict_time = datetime.utcnow()
     pipeline.fit(train, readings)
     predictions = pipeline.predict(test, readings)
+    fit_predict_time = datetime.utcnow() - fit_predict_time
 
     scores['default_test'] = metric(test['target'], predictions)
 
     # Computing the default cross validation score
+    cv_time = datetime.utcnow()
     session = pipeline.tune(train, readings)
     session.run(1)
+    cv_time = datetime.utcnow() - cv_time
 
     scores['default_cv'] = pipeline.cv_score
 
@@ -136,14 +147,17 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
     predictions = pipeline.predict(test, readings)
 
     scores['tuned_test'] = metric(test['target'], predictions)
+    scores['fit_predict_time'] = fit_predict_time
+    scores['cv_time'] = cv_time
+    scores['total_time'] = datetime.utcnow() - start_time
 
     return scores
 
 
-def evaluate_templates(templates, window_size_rule, metric='f1',
-                       tuning_iterations=50, init_params=None, target_times=None,
-                       readings=None, preprocessing=0, cost=False, test_size=0.25,
-                       cv_splits=3, random_state=0, cache_path=None, output_path=None):
+def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iterations=50,
+                       init_params=None, target_times=None, readings=None, preprocessing=0,
+                       cost=False, test_size=0.25, cv_splits=3, random_state=0, cache_path=None,
+                       cache_results=None, problem_name=None, output_path=None):
     """Execute the benchmark process and optionally store the result as a ``CSV``.
 
     Args:
@@ -239,6 +253,7 @@ def evaluate_templates(templates, window_size_rule, metric='f1',
         window_size, rule = window_rule
 
         scores = dict()
+        scores['problem_name'] = problem_name
         scores['template'] = template
         scores['window_size'] = window_size
         scores['resample_rule'] = rule
@@ -270,11 +285,34 @@ def evaluate_templates(templates, window_size_rule, metric='f1',
             scores['status'] = 'ERRORED'
             LOGGER.exception('Could not score template %s ', template)
 
+        if cache_results:
+            os.makedirs(cache_results, exist_ok=True)
+            template_name = template
+            if os.path.isfile(template_name):
+                template_name = os.path.basename(template_name).replace('.json', '')
+
+            file_name = '{}_{}_{}_{}.csv'.format(problem_name, template_name, window_size, rule)
+            pd.DataFrame([scores]).to_csv(os.path.join(cache_results, file_name), index=False)
+
         scores_list.append(scores)
 
     results = pd.DataFrame.from_records(scores_list)
-    results = results.reindex(['template', 'window_size', 'resample_rule', 'default_test',
-                               'default_cv', 'tuned_cv', 'tuned_test', 'status'], axis=1)
+    columns = [
+        'problem_name',
+        'window_size',
+        'resample_rule',
+        'template',
+        'default_test',
+        'default_cv',
+        'tuned_cv',
+        'tuned_test',
+        'fit_predict_time',
+        'cv_time',
+        'total_time',
+        'status',
+    ]
+
+    results = results.reindex(columns, axis=1)
 
     if output_path:
         LOGGER.info('Saving benchmark report to %s', output_path)
@@ -317,14 +355,17 @@ def make_problems(target_times_paths, readings_path, window_size_resample_rule,
     if isinstance(target_times_paths, str):
         target_times_paths = [target_times_paths]
     if isinstance(target_times_paths, list):
-        target_times_paths = {os.path.basename(path)[:-4]: path for path in target_times_paths}
+        target_times_paths = {
+            os.path.basename(path).replace('.csv', ''): path
+            for path in target_times_paths
+        }
 
     if output_path:
         generated_problems = list()
     else:
         generated_problems = {}
 
-    for name, target_time_path in tqdm(target_times_paths.values()):
+    for problem_name, target_time_path in tqdm(target_times_paths.values()):
         for window_size, rule in window_size_resample_rule:
             target_times = pd.read_csv(target_time_path, parse_dates=['cutoff_time'])
             new_target_times, readings = _generate_target_times_readings(
@@ -335,7 +376,7 @@ def make_problems(target_times_paths, readings_path, window_size_resample_rule,
                 signals=signals,
             )
 
-            problem_name = '{}_{}_{}.pkl'.format(name, window_size, rule)
+            problem_name = '{}_{}_{}.pkl'.format(problem_name, window_size, rule)
 
             if output_path:
                 output_pickle_path = os.path.join(output_path, problem_name)
@@ -350,21 +391,16 @@ def make_problems(target_times_paths, readings_path, window_size_resample_rule,
     return generated_problems
 
 
-def benchmark(templates, problem_paths=None, target_times_paths=None, readings_path=None,
-              window_size_resample_rule=None, signals=None, tuning_iterations=100,
-              preprocessing=None, init_params=None, cost=False, cv_splits=5, metric='f1',
-              test_size=0.33, random_state=0, cache_path=None, output_path=None):
+def run_benchmark(templates, problem_paths=None, window_size_resample_rule=None,
+                  tuning_iterations=100, preprocessing=0, init_params=None, cost=False,
+                  cv_splits=5, metric='f1', test_size=0.33, random_state=0, cache_path=None,
+                  output_path=None, cache_results=None):
     """
     Args:
         templates (list):
         problem_paths (list):
-        target_times_paths (list):
-        readings_path (str):
         window_size_resample_rule (list):
             List of tupples (int, str or Timedelta object).
-        signals (str):
-            List of signal names or csv file that has a `signal_id` column to use as the signal
-            names list.
         tuning_iterations (int):
         preprocessing :
         init_params :
@@ -376,21 +412,23 @@ def benchmark(templates, problem_paths=None, target_times_paths=None, readings_p
     """
     templates = as_list(templates)
     results = list()
+    problem_paths = as_list(problem_paths)
 
-    if target_times_paths:
-        target_times_paths = as_list(target_times_paths)
-        if readings_path is None:
-            raise ValueError('Missing readings path.')
-
-        for tt_path in tqdm(target_times_paths):
-            for window_size, resample_rule in window_size_resample_rule:
-                target_times = pd.read_csv(tt_path, parse_dates=['cutoff_time'])
-                target_times, readings = _generate_target_times_readings(
-                    tt_path,
-                    readings_path,
-                    resample_rule,
-                    signals,
-                )
+    for problem_path in tqdm(problem_paths):
+        with open(problem_path, 'rb') as pickle_file:
+            target_times, readings, pickle_window_size, pickle_rule = pickle.load(pickle_file)
+
+        problem_name = '_'.join(os.path.basename(problem_path).split('_')[:-2])
+
+        if window_size_resample_rule is None:
+            window_size_resample_rule = [(pickle_window_size, pickle_rule)]
+
+        for window_size, resample_rule in window_size_resample_rule:
+
+            # window_size can be only smaller than pickle window size
+            # resample rule can be only bigger than picke rule
+            if (pd.to_timedelta(pickle_window_size) >= pd.to_timedelta(window_size)
+                    and pd.to_timedelta(pickle_rule) <= pd.to_timedelta(resample_rule)): # noqa W503
 
                 df = evaluate_templates(
                     templates,
@@ -406,51 +444,22 @@ def benchmark(templates, problem_paths=None, target_times_paths=None, readings_p
                     cv_splits=cv_splits,
                     random_state=random_state,
                     cache_path=cache_path,
+                    cache_results=cache_results,
+                    problem_name=problem_name,
                     output_path=None
                 )
 
                 results.append(df)
 
-    else:
+                if cache_results:
+                    file_name = '{}_{}_{}.csv'.format(problem_name, window_size, resample_rule)
+                    df.to_csv(os.path.join(cache_results, file_name), index=False)
+
+            else:
+                msg = 'Invalid window size or resample rule {}.'.format(
+                    (window_size, pickle_window_size, resample_rule, pickle_rule))
 
-        problem_paths = as_list(problem_paths)
-        for problem_path in tqdm(problem_paths):
-            with open(problem_path, 'rb') as pickle_file:
-                target_times, readings, pickle_window_size, pickle_rule = pickle.load(pickle_file)
-
-            if window_size_resample_rule is None:
-                window_size_resample_rule = [(pickle_window_size, pickle_rule)]
-
-            for window_size, resample_rule in window_size_resample_rule:
-
-                # window_size can be only smaller than pickle window size
-                # resample rule can be only bigger than picke rule
-                if (pd.to_timedelta(pickle_window_size) >= pd.to_timedelta(window_size)
-                        and pd.to_timedelta(pickle_rule) <= pd.to_timedelta(resample_rule)): # noqa W503
-
-                    df = evaluate_templates(
-                        templates,
-                        [(window_size, resample_rule)],
-                        metric=metric,
-                        tuning_iterations=tuning_iterations,
-                        init_params=init_params,
-                        target_times=target_times,
-                        readings=readings,
-                        preprocessing=preprocessing,
-                        cost=cost,
-                        test_size=test_size,
-                        cv_splits=cv_splits,
-                        random_state=random_state,
-                        cache_path=cache_path,
-                        output_path=None
-                    )
-
-                    results.append(df)
-
-                else:
-                    msg = (f'Invalid window size of {window_size} for {pickle_window_size}'
-                           f' or invalid resample rule {resample_rule} for {pickle_rule}.')
-                    LOGGER.info(msg)
+                LOGGER.info(msg)
 
     results = pd.concat(results, ignore_index=True)
 
@@ -461,17 +470,118 @@ def benchmark(templates, problem_paths=None, target_times_paths=None, readings_p
         return results
 
 
+def _run(args):
+    # Logger setup
+    log_level = (3 - args.verbose) * 10
+    fmt = '%(asctime)s - %(process)d - %(levelname)s - %(name)s - %(module)s - %(message)s'
+    logging.basicConfig(level=log_level, format=fmt)
+    logging.getLogger("botocore").setLevel(logging.ERROR)
+    logging.getLogger("hyperopt").setLevel(logging.ERROR)
+    logging.getLogger("ax").setLevel(logging.ERROR)
+    logging.getLogger("urllib3").setLevel(logging.CRITICAL)
+
+    if args.templates is None:
+        args.templates = get_pipelines()
+
+    window_size_resample_rule = None
+    if args.window_size:
+        window_size_resample_rule = list(product(args.window_size, args.resample_rule))
+
+    # run
+    results = run_benchmark(
+        templates=args.templates,
+        problem_paths=args.problem_paths,
+        window_size_resample_rule=window_size_resample_rule,
+        cv_splits=args.cv_splits,
+        metric=args.metric,
+        test_size=args.test_size,
+        random_state=args.random_state,
+        cache_path=args.cache_path,
+        cache_results=args.cache_results,
+        tuning_iterations=args.iterations,
+        output_path=args.output_path,
+    )
+
+    if not args.output_path:
+        print(tabulate.tabulate(
+            results,
+            tablefmt='github',
+            headers=results.columns
+        ))
+
+
+def _summary(args):
+    summarize_results(args.input, args.output)
+
+
+def _create(args):
+    window_size_resample_rule = list(product(args.window_size, args.resample_rule))
+    make_problems(
+        args.target_times_paths,
+        args.readings_path,
+        window_size_resample_rule,
+        output_path=args.output_path,
+        signals=args.signals
+    )
+
+
 def _get_parser():
     parser = argparse.ArgumentParser(description='GreenGuard Benchmark Command Line Interface.')
-    parser.set_defaults(action=benchmark)
-
-    # Add arguments
-    parser.add_argument('-t', '--templates', nargs='+', help='List of templates to try.')
-    parser.add_argument('-p', '--problems', nargs='+', help='Paths to problems to be benchmarked.')
-    parser.add_argument('-w', '--window-size-resample-rule', nargs='+',
-                        help='List of tuples with window size and resample rule to benchmark.')
-    parser.add_argument('-i', '--tuning-iterations', type=int, default=100,
-                        help='Number of tuning iterations to perform per problem per pipeline.')
+    parser.set_defaults(action=None)
+    action = parser.add_subparsers(title='action')
+    action.required = True
+
+    # Run action
+    run = action.add_parser('run', help='Run the GreenGuard Benchmark')
+    run.set_defaults(action=_run)
+    run.set_defaults(user=None)
+
+    run.add_argument('-v', '--verbose', action='http://github.com/count', default=0,
+                     help='Be verbose. Use -vv for increased verbosity.')
+    run.add_argument('-t', '--templates', nargs='+',
+                     help='Perform benchmarking over the given list of templates.')
+    run.add_argument('-p', '--problem-paths', nargs='+', required=False,
+                     help='Perform benchmarking over a list of pkl problems.')
+    run.add_argument('-w', '--window-size', nargs='+', required=False,
+                     help='List of window sizes values to benchmark.')
+    run.add_argument('-r', '--resample-rule', nargs='+', required=False,
+                     help='List of resample rule to benchmark.')
+    run.add_argument('-o', '--output_path', type=str,
+                     help='Output path where to store the results.')
+    run.add_argument('-s', '--cv-splits', type=int, default=5,
+                     help='Amount of cross validation splits to use.')
+    run.add_argument('-m', '--metric', type=str, default='f1',
+                     help='Name of metric function to be used during benchmarking.')
+    run.add_argument('-n', '--random-state', type=int, default=0,
+                     help='Random state for the cv splits.')
+    run.add_argument('-e', '--test-size', type=float, default=0.33,
+                     help='Percentage of the data set to be used for the test.')
+    run.add_argument('-c', '--cache-path', type=str,
+                     help='Path to cache the generated cross validation splits in.')
+    run.add_argument('-R', '--cache-results', type=str,
+                     help='Path to store the csv files for each problem and template.')
+    run.add_argument('-i', '--iterations', type=int, default=100,
+                     help='Number of iterations to perform per challenge with each candidate.')
+
+    # Summarize action
+    summary = action.add_parser('summary', help='Summarize the GreenGuard Benchmark results')
+    summary.set_defaults(action=_summary)
+    summary.add_argument('input', nargs='+', help='Input path with results.')
+    summary.add_argument('output', help='Output file.')
+
+    # Create action
+    create = action.add_parser('create', help='Create GreenGuard problems')
+    create.set_defaults(action=_create)
+    create.add_argument('target-times-paths', nargs='+', help='List of target times paths.')
+    create.add_argument('readings-path', type=str, help='Path to the readings folder.')
+    create.add_argument('-w', '--window-size', nargs='+', required=False,
+                        help='List of window sizes values to benchmark.')
+    create.add_argument('-r', '--resample-rule', nargs='+', required=False,
+                        help='List of resample rule to benchmark.')
+    create.add_argument('-o', '--output', type=str,
+                        help='Output path where to save the generated problems.')
+    create.add_argument('-s', '--signals', type=str,
+                        help='Path to csv file that has signal_id column to use as the signal')
 
     return parser
 
@@ -481,12 +591,12 @@ def main():
 
     # Parse args
     parser = _get_parser()
-    args = parser.parse_args()
-    if args.templates is None:
+    if len(sys.argv) < 2:
         parser.print_help()
         sys.exit(0)
 
-    args.action(**args)
+    args = parser.parse_args()
+    args.action(args)
 
 
 if __name__ == '__main__':

From 043009662196a666741a36ecb645365cae69d16e Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Wed, 7 Oct 2020 17:44:20 +0200
Subject: [PATCH 103/171] Allow dictionary usage as problems

---
 greenguard/benchmark.py | 95 +++++++++++++++++++++++------------------
 setup.py                |  2 +
 2 files changed, 56 insertions(+), 41 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 3747653..c29af4b 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -2,6 +2,7 @@
 import logging
 import os
 import pickle
+import re
 import sys
 import warnings
 from datetime import datetime
@@ -17,11 +18,25 @@
 from greenguard.loaders import CSVLoader
 from greenguard.metrics import METRICS
 from greenguard.pipeline import GreenGuardPipeline, generate_init_params, generate_preprocessing
-from greenguard.results import summarize_results
 from greenguard.utils import as_list
 
 LOGGER = logging.getLogger(__name__)
 
+LEADERBOARD_COLUMNS = [
+    'problem_name',
+    'window_size',
+    'resample_rule',
+    'template',
+    'default_test',
+    'default_cv',
+    'tuned_cv',
+    'tuned_test',
+    'fit_predict_time',
+    'cv_time',
+    'total_time',
+    'status',
+]
+
 
 def _build_init_params(template, window_size, rule, template_params):
     if 'dfs' in template:
@@ -241,7 +256,6 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
     3        normalize_dfs_xgb_classifier          7d            4h      0.581818    0.619698  0.650367    0.603774     OK
 
     """  # noqa
-
     if readings is None and target_times is None:
         target_times, readings = load_demo()
 
@@ -292,27 +306,13 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
                 template_name = os.path.basename(template_name).replace('.json', '')
 
             file_name = '{}_{}_{}_{}.csv'.format(problem_name, template_name, window_size, rule)
-            pd.DataFrame([scores]).to_csv(os.path.join(cache_results, file_name), index=False)
+            df = pd.DataFrame([scores]).reindex(LEADERBOARD_COLUMNS, axis=1)
+            df.to_csv(os.path.join(cache_results, file_name), index=False)
 
         scores_list.append(scores)
 
     results = pd.DataFrame.from_records(scores_list)
-    columns = [
-        'problem_name',
-        'window_size',
-        'resample_rule',
-        'template',
-        'default_test',
-        'default_cv',
-        'tuned_cv',
-        'tuned_test',
-        'fit_predict_time',
-        'cv_time',
-        'total_time',
-        'status',
-    ]
-
-    results = results.reindex(columns, axis=1)
+    results = results.reindex(LEADERBOARD_COLUMNS, axis=1)
 
     if output_path:
         LOGGER.info('Saving benchmark report to %s', output_path)
@@ -365,7 +365,7 @@ def make_problems(target_times_paths, readings_path, window_size_resample_rule,
     else:
         generated_problems = {}
 
-    for problem_name, target_time_path in tqdm(target_times_paths.values()):
+    for problem_name, target_time_path in tqdm(target_times_paths.items()):
         for window_size, rule in window_size_resample_rule:
             target_times = pd.read_csv(target_time_path, parse_dates=['cutoff_time'])
             new_target_times, readings = _generate_target_times_readings(
@@ -376,22 +376,22 @@ def make_problems(target_times_paths, readings_path, window_size_resample_rule,
                 signals=signals,
             )
 
-            problem_name = '{}_{}_{}.pkl'.format(problem_name, window_size, rule)
+            pickle_name = '{}_{}_{}'.format(problem_name, window_size, rule)
 
             if output_path:
-                output_pickle_path = os.path.join(output_path, problem_name)
+                output_pickle_path = os.path.join(output_path, pickle_name + '.pkl')
                 with open(output_pickle_path, 'wb') as pickle_file:
                     pickle.dump((new_target_times, readings, window_size, rule), pickle_file)
 
                 generated_problems.append(output_pickle_path)
 
             else:
-                generated_problems[problem_name] = (new_target_times, readings, window_size, rule)
+                generated_problems[pickle_name] = (new_target_times, readings, window_size, rule)
 
     return generated_problems
 
 
-def run_benchmark(templates, problem_paths=None, window_size_resample_rule=None,
+def run_benchmark(templates, problems=None, window_size_resample_rule=None,
                   tuning_iterations=100, preprocessing=0, init_params=None, cost=False,
                   cv_splits=5, metric='f1', test_size=0.33, random_state=0, cache_path=None,
                   output_path=None, cache_results=None):
@@ -412,23 +412,30 @@ def run_benchmark(templates, problem_paths=None, window_size_resample_rule=None,
     """
     templates = as_list(templates)
     results = list()
-    problem_paths = as_list(problem_paths)
-
-    for problem_path in tqdm(problem_paths):
-        with open(problem_path, 'rb') as pickle_file:
-            target_times, readings, pickle_window_size, pickle_rule = pickle.load(pickle_file)
+    if isinstance(problems, str):
+        problems = [problems]
+    if isinstance(problems, list):
+        problems = {
+            '_'.join(os.path.basename(problem).split('_')[:-2]): problem
+            for problem in problems
+        }
 
-        problem_name = '_'.join(os.path.basename(problem_path).split('_')[:-2])
+    for problem_name, problem in tqdm(problems.items()):
+        if isinstance(problem, str):
+            with open(problem, 'rb') as pickle_file:
+                target_times, readings, orig_window_size, orig_rule = pickle.load(pickle_file)
+        else:
+            target_times, readings, orig_window_size, orig_rule = problem
 
         if window_size_resample_rule is None:
-            window_size_resample_rule = [(pickle_window_size, pickle_rule)]
+            window_size_resample_rule = [(orig_window_size, orig_rule)]
 
         for window_size, resample_rule in window_size_resample_rule:
 
             # window_size can be only smaller than pickle window size
             # resample rule can be only bigger than picke rule
-            if (pd.to_timedelta(pickle_window_size) >= pd.to_timedelta(window_size)
-                    and pd.to_timedelta(pickle_rule) <= pd.to_timedelta(resample_rule)): # noqa W503
+            if (pd.to_timedelta(orig_window_size) >= pd.to_timedelta(window_size)
+                    and pd.to_timedelta(orig_rule) <= pd.to_timedelta(resample_rule)): # noqa W503
 
                 df = evaluate_templates(
                     templates,
@@ -457,7 +464,7 @@ def run_benchmark(templates, problem_paths=None, window_size_resample_rule=None,
 
             else:
                 msg = 'Invalid window size or resample rule {}.'.format(
-                    (window_size, pickle_window_size, resample_rule, pickle_rule))
+                    (window_size, orig_window_size, resample_rule, orig_rule))
 
                 LOGGER.info(msg)
 
@@ -484,13 +491,17 @@ def _run(args):
         args.templates = get_pipelines()
 
     window_size_resample_rule = None
-    if args.window_size:
-        window_size_resample_rule = list(product(args.window_size, args.resample_rule))
+    if args.window_size_resample_rule:
+        pattern = re.compile(r'\d+[DdHhMmSs]')
+        window_size_resample_rule = [
+            tuple(pattern.findall(item))
+            for item in args.window_size_resample_rule
+        ]
 
     # run
     results = run_benchmark(
         templates=args.templates,
-        problem_paths=args.problem_paths,
+        problems=args.problems,
         window_size_resample_rule=window_size_resample_rule,
         cv_splits=args.cv_splits,
         metric=args.metric,
@@ -510,6 +521,10 @@ def _run(args):
         ))
 
 
+def summarize_results(input_path, output_path):
+    pass
+
+
 def _summary(args):
     summarize_results(args.input, args.output)
 
@@ -540,12 +555,10 @@ def _get_parser():
                      help='Be verbose. Use -vv for increased verbosity.')
     run.add_argument('-t', '--templates', nargs='+',
                      help='Perform benchmarking over the given list of templates.')
-    run.add_argument('-p', '--problem-paths', nargs='+', required=False,
+    run.add_argument('-p', '--problems', nargs='+', required=False,
                      help='Perform benchmarking over a list of pkl problems.')
-    run.add_argument('-w', '--window-size', nargs='+', required=False,
+    run.add_argument('-w', '--window-size-resample-rule', nargs='+', required=False,
                      help='List of window sizes values to benchmark.')
-    run.add_argument('-r', '--resample-rule', nargs='+', required=False,
-                     help='List of resample rule to benchmark.')
     run.add_argument('-o', '--output_path', type=str,
                      help='Output path where to store the results.')
     run.add_argument('-s', '--cv-splits', type=int, default=5,
diff --git a/setup.py b/setup.py
index f78ff68..b671c9c 100644
--- a/setup.py
+++ b/setup.py
@@ -16,10 +16,12 @@
     history = ''
 
 install_requires = [
+    'xlsxwriter>=1.3.6<1.4',
     'matplotlib<3.2.2',
     'boto3==1.14.44',
     'botocore==1.17.44',
     'baytune>=0.3.9,<0.4',
+    'tabulate>=0.8.3,<0.9',
     'Keras>=2.1.6,<2.4',
     'mlblocks>=0.3.4,<0.4',
     'mlprimitives>=0.2.5,<0.3',

From 0b0d3bb592c71e1fbbdc39b1e77de2a6c3e81190 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Wed, 7 Oct 2020 19:47:46 +0200
Subject: [PATCH 104/171] WIP

---
 greenguard/benchmark.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index c29af4b..4d6e397 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -416,11 +416,14 @@ def run_benchmark(templates, problems=None, window_size_resample_rule=None,
         problems = [problems]
     if isinstance(problems, list):
         problems = {
-            '_'.join(os.path.basename(problem).split('_')[:-2]): problem
+            os.path.basename(problem).replace('.pkl', ''): problem
             for problem in problems
         }
 
     for problem_name, problem in tqdm(problems.items()):
+        # remove window_size resample_rule nomenclature from the problem's name
+        problem_name = re.sub(r'\_\d+[DdHhMmSs]', r'', problem_name)
+
         if isinstance(problem, str):
             with open(problem, 'rb') as pickle_file:
                 target_times, readings, orig_window_size, orig_rule = pickle.load(pickle_file)

From be89b4c5355a4db252f49540e38a5fb7698fcba7 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 8 Oct 2020 10:19:49 +0200
Subject: [PATCH 105/171] Update documentation and add logging.

---
 greenguard/benchmark.py | 214 ++++++++++++++++++++++++++++++----------
 1 file changed, 164 insertions(+), 50 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 4d6e397..9bbfea7 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -18,7 +18,6 @@
 from greenguard.loaders import CSVLoader
 from greenguard.metrics import METRICS
 from greenguard.pipeline import GreenGuardPipeline, generate_init_params, generate_preprocessing
-from greenguard.utils import as_list
 
 LOGGER = logging.getLogger(__name__)
 
@@ -31,6 +30,7 @@
     'default_cv',
     'tuned_cv',
     'tuned_test',
+    'metric',
     'fit_predict_time',
     'cv_time',
     'total_time',
@@ -172,14 +172,14 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
 def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iterations=50,
                        init_params=None, target_times=None, readings=None, preprocessing=0,
                        cost=False, test_size=0.25, cv_splits=3, random_state=0, cache_path=None,
-                       cache_results=None, problem_name=None, output_path=None):
+                       cache_results=None, problem_name=None, output_path=None, progress_bar=None):
     """Execute the benchmark process and optionally store the result as a ``CSV``.
 
     Args:
         templates (list):
             List of templates to try.
         window_size_rule (list):
-            List of tupples (int, str or Timedelta object).
+            List of tuples (int, str or Timedelta object).
         metric (function or str):
             Metric to use. If an ``str`` is give it must be one of the metrics
             defined in the ``greenguard.metrics.METRICS`` dictionary.
@@ -273,10 +273,14 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
         scores['resample_rule'] = rule
 
         try:
+            LOGGER.info('Evaluating template %s on problem %s (%s, %s)',
+                        template, problem_name, window_size, rule)
+
             template_params = init_params[template]
             template_params = _build_init_params(template, window_size, rule, template_params)
             template_preprocessing = preprocessing[template]
 
+
             result = evaluate_template(
                 template=template,
                 target_times=target_times,
@@ -310,6 +314,7 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
             df.to_csv(os.path.join(cache_results, file_name), index=False)
 
         scores_list.append(scores)
+        progress_bar.update(1)
 
     results = pd.DataFrame.from_records(scores_list)
     results = results.reindex(LEADERBOARD_COLUMNS, axis=1)
@@ -338,14 +343,25 @@ def _generate_target_times_readings(target_times, readings_path, window_size, ru
 
 def make_problems(target_times_paths, readings_path, window_size_resample_rule,
                   output_path=None, signals=None):
-    """
+    """Make problems with the target times and readings for each window size and resample rule.
+
+    Create problems in the accepted format by ``run_benchmark`` as pickle files containing:
+
+        * ``target_times``: ``pandas.DataFrame`` containing the target times.
+        * ``readings``: ``pandas.DataFrame`` containing the readings for the target times.
+        * ``window_size``: window size value used.
+        * ``resample_rule``: resample rule value used.
+
+    Or return a ``dict`` containing as keys the names of the problems generated and tuples with
+    the previously specified fields of target times, readings, window size and resample rule.
+
     Args:
         target_times_paths (list):
             List of paths to CSVs that contain target times.
         readings_path (str):
             Path to the folder where readings in raw CSV format can be found.
         window_size_resample_rule (list):
-            List of tupples (int, str or Timedelta object).
+            List of tuples (int, str or Timedelta object).
         output_path (str):
             Path to save the generated problems.
         signals (str):
@@ -365,6 +381,9 @@ def make_problems(target_times_paths, readings_path, window_size_resample_rule,
     else:
         generated_problems = {}
 
+    if isinstance(signals, str) and os.path.exists(signals):
+        signals = pd.read_csv(signals).signal_id
+
     for problem_name, target_time_path in tqdm(target_times_paths.items()):
         for window_size, rule in window_size_resample_rule:
             target_times = pd.read_csv(target_time_path, parse_dates=['cutoff_time'])
@@ -379,6 +398,7 @@ def make_problems(target_times_paths, readings_path, window_size_resample_rule,
             pickle_name = '{}_{}_{}'.format(problem_name, window_size, rule)
 
             if output_path:
+                os.makedirs(output_path, exist_ok=True)
                 output_pickle_path = os.path.join(output_path, pickle_name + '.pkl')
                 with open(output_pickle_path, 'wb') as pickle_file:
                     pickle.dump((new_target_times, readings, window_size, rule), pickle_file)
@@ -391,26 +411,95 @@ def make_problems(target_times_paths, readings_path, window_size_resample_rule,
     return generated_problems
 
 
-def run_benchmark(templates, problems=None, window_size_resample_rule=None,
-                  tuning_iterations=100, preprocessing=0, init_params=None, cost=False,
-                  cv_splits=5, metric='f1', test_size=0.33, random_state=0, cache_path=None,
-                  output_path=None, cache_results=None):
-    """
+def run_benchmark(templates, problems, window_size_resample_rule=None,
+                  tuning_iterations=50, signals=None, preprocessing=0, init_params=None,
+                  metric='f1', cost=False, cv_splits=5, test_size=0.33, random_state=0,
+                  cache_path=None,cache_results=None, output_path=None):
+    """Execute the benchmark function and optionally store the result as a ``CSV``.
+
+    This function provides a user-friendly interface to interact with the ``evaluate_templates``
+    function. It allows the user to specify an ``output_path`` where the results can be
+    stored. If this path is not provided, a ``pandas.DataFrame`` will be returned.
+
+    This function evaluates each template against each problem for each window size and resample
+    rule possible, and will tune each teamplate for the given amount of tuning iterations.
+
+    The problems can be a pickle file that contains the following values:
+
+        * ``target_times``: ``pandas.DataFrame`` containing the target times.
+        * ``readings``: ``pandas.DataFrame`` containing the readings for the target times.
+        * ``window_size``: window size value used.
+        * ``resample_rule``: resample rule value used.
+
+    Or it can be dictionary containing the problem's name and as values either a path to a pickle
+    file or a tuple containing the previously specified fields.
+
     Args:
-        templates (list):
-        problem_paths (list):
+        templates (str or list):
+            Name of the json pipelines that will be evaluated against the problems.
+        problems (str, list or dict):
+            There are three possible values for problems:
+
+                * ``str``: Path to a given problem stored as a pickle file (pkl).
+                * ``list``: List of paths to given problems stored as a pickle files (pkl).
+                * ``dict``: A dict containing as keys the name of the problem and as value the
+                            path to a pickle file or a tuple with target times and readings data
+                            frames and the window size and resample rule used to generate this
+                            problem.
+
+            The pickle files has to contain a tuple with target times and readings data frames and
+            the window size and resample rule used to generate that problem. We recommend using
+            the function ``make_problems`` to generate those files.
+
         window_size_resample_rule (list):
-            List of tupples (int, str or Timedelta object).
+            List of tuples (int, str or Timedelta object).
         tuning_iterations (int):
-        preprocessing :
-        init_params :
-        cost :
-        test_size :
+            Amount of tuning iterations to perfrom over each template.
+        signals (str or list):
+            Path to a csv file containing ``signal_id`` column that we would like to use or a
+            ``list`` of signals that we would like to use. If ``None`` use all the signals from
+            the readings.
+        preprocessing (int, dict or list):
+            There are three possible values for preprocessing:
+
+                * ``int``: the value will be used for all templates.
+                * ``dict`` with the template name as a key and a number as a value, will
+                  be used for that template.
+                * ``list``: each value will be assigned to the corresponding position of
+                  self.templates.
+
+            Defaults to ``0``.
+        init_params (dict or list):
+            There are three possible values for init_params:
+
+                * Init params ``dict``: It will be used for all templates.
+                * ``dict`` with the name of the template as a key and dictionary with its
+                  init params.
+                * ``list``: each value will be assigned to the corresponding position of
+                  self.templates.
+
+            Defaults to ``None``.
+        metric (function or str):
+            Metric to use. If an ``str`` is give it must be one of the metrics
+            defined in the ``greenguard.metrics.METRICS`` dictionary.
+        cost (bool):
+            Whether the metric is a cost function (the lower the better) or not.
+            Defaults to ``False``.
         cv_splits (int):
+            Number of cross validation folds to use. Defaults to ``5``.
+        test_size (float):
+            Amount of data that will be saved for test, represented in percentage between 0 and 1.
+        random_state (int or RandomState):
+            random state to use for the cross validation partitioning. Defaults to ``0``.
         cache_path (str):
+            If given, cache the generated cross validation splits in this folder.
+            Defatuls to ``None``.
+        cache_results (str):
+            If provided, store the progress of each pipeline and each problem while runing.
         output_path (str):
+            If provided, store the results to the given filename. Defaults to ``None``.
     """
-    templates = as_list(templates)
+    templates = templates if isinstance(templates, (list, tuple)) else [templates]
     results = list()
     if isinstance(problems, str):
         problems = [problems]
@@ -420,7 +509,14 @@ def run_benchmark(templates, problems=None, window_size_resample_rule=None,
             for problem in problems
         }
 
-    for problem_name, problem in tqdm(problems.items()):
+    if signals is not None:
+        if isinstance(signals, str) and os.path.exists(signals):
+            signals = pd.read_csv(signals).signal_id
+
+    total_runs = len(templates) * len(problems) * len(window_size_resample_rule or [1])
+    pbar = tqdm(total=total_runs)
+
+    for problem_name, problem in problems.items():
         # remove window_size resample_rule nomenclature from the problem's name
         problem_name = re.sub(r'\_\d+[DdHhMmSs]', r'', problem_name)
 
@@ -430,15 +526,20 @@ def run_benchmark(templates, problems=None, window_size_resample_rule=None,
         else:
             target_times, readings, orig_window_size, orig_rule = problem
 
-        if window_size_resample_rule is None:
-            window_size_resample_rule = [(orig_window_size, orig_rule)]
+        if signals is not None:
+            readings = readings[readings.signal_id.isin(signals)]
+
+        wsrr = window_size_resample_rule or [(orig_window_size, orig_rule)]
+
+        orig_window_size = pd.to_timedelta(orig_window_size)
+        orig_rule = pd.to_timedelta(orig_rule)
 
-        for window_size, resample_rule in window_size_resample_rule:
+        for window_size, resample_rule in wsrr:
 
             # window_size can be only smaller than pickle window size
             # resample rule can be only bigger than picke rule
-            if (pd.to_timedelta(orig_window_size) >= pd.to_timedelta(window_size)
-                    and pd.to_timedelta(orig_rule) <= pd.to_timedelta(resample_rule)): # noqa W503
+            if (orig_window_size >= pd.to_timedelta(window_size)
+                    and orig_rule <= pd.to_timedelta(resample_rule)): # noqa W503
 
                 df = evaluate_templates(
                     templates,
@@ -456,7 +557,8 @@ def run_benchmark(templates, problems=None, window_size_resample_rule=None,
                     cache_path=cache_path,
                     cache_results=cache_results,
                     problem_name=problem_name,
-                    output_path=None
+                    output_path=None,
+                    progress_bar=pbar
                 )
 
                 results.append(df)
@@ -466,30 +568,36 @@ def run_benchmark(templates, problems=None, window_size_resample_rule=None,
                     df.to_csv(os.path.join(cache_results, file_name), index=False)
 
             else:
+                pbar.update(1)
+
                 msg = 'Invalid window size or resample rule {}.'.format(
                     (window_size, orig_window_size, resample_rule, orig_rule))
 
-                LOGGER.info(msg)
+                LOGGER.warn(msg)
 
-    results = pd.concat(results, ignore_index=True)
+    pbar.close()
 
+    results = pd.concat(results, ignore_index=True)
     if output_path:
+        os.makedirs(output_path, exist_ok=True)
         results.to_csv(output_path, index=False)
 
     else:
         return results
 
 
-def _run(args):
+def _setup_logging(args):
     # Logger setup
     log_level = (3 - args.verbose) * 10
     fmt = '%(asctime)s - %(process)d - %(levelname)s - %(name)s - %(module)s - %(message)s'
-    logging.basicConfig(level=log_level, format=fmt)
+    logging.basicConfig(filename=args.logfile, level=log_level, format=fmt)
     logging.getLogger("botocore").setLevel(logging.ERROR)
-    logging.getLogger("hyperopt").setLevel(logging.ERROR)
-    logging.getLogger("ax").setLevel(logging.ERROR)
+    logging.getLogger("tensorflow").setLevel(logging.ERROR)
     logging.getLogger("urllib3").setLevel(logging.CRITICAL)
+    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'  # FATAL
+
 
+def _run(args):
     if args.templates is None:
         args.templates = get_pipelines()
 
@@ -514,6 +622,7 @@ def _run(args):
         cache_results=args.cache_results,
         tuning_iterations=args.iterations,
         output_path=args.output_path,
+        signals=args.signals,
     )
 
     if not args.output_path:
@@ -528,11 +637,11 @@ def summarize_results(input_path, output_path):
     pass
 
 
-def _summary(args):
+def _summarize_results(args):
     summarize_results(args.input, args.output)
 
 
-def _create(args):
+def _make_problems(args):
     window_size_resample_rule = list(product(args.window_size, args.resample_rule))
     make_problems(
         args.target_times_paths,
@@ -556,6 +665,8 @@ def _get_parser():
 
     run.add_argument('-v', '--verbose', action='http://github.com/count', default=0,
                      help='Be verbose. Use -vv for increased verbosity.')
+    run.add_argument('-l', '--logfile',
+                     help='Log file.')
     run.add_argument('-t', '--templates', nargs='+',
                      help='Perform benchmarking over the given list of templates.')
     run.add_argument('-p', '--problems', nargs='+', required=False,
@@ -578,33 +689,34 @@ def _get_parser():
                      help='Path to store the csv files for each problem and template.')
     run.add_argument('-i', '--iterations', type=int, default=100,
                      help='Number of iterations to perform per challenge with each candidate.')
+    run.add_argument('-S', '--signals', type=str,
+                     help='Path to csv file that has signal_id column to use as the signal')
 
     # Summarize action
-    summary = action.add_parser('summary', help='Summarize the GreenGuard Benchmark results')
-    summary.set_defaults(action=_summary)
+    summary = action.add_parser('summarize-results',
+                                help='Summarize the GreenGuard Benchmark results')
+    summary.set_defaults(action=_summarize_results)
     summary.add_argument('input', nargs='+', help='Input path with results.')
     summary.add_argument('output', help='Output file.')
 
-    # Create action
-    create = action.add_parser('create', help='Create GreenGuard problems')
-    create.set_defaults(action=_create)
-    create.add_argument('target-times-paths', nargs='+', help='List of target times paths.')
-    create.add_argument('readings-path', type=str, help='Path to the readings folder.')
-    create.add_argument('-w', '--window-size', nargs='+', required=False,
-                        help='List of window sizes values to benchmark.')
-    create.add_argument('-r', '--resample-rule', nargs='+', required=False,
-                        help='List of resample rule to benchmark.')
-    create.add_argument('-o', '--output', type=str,
-                        help='Output path where to save the generated problems.')
-    create.add_argument('-s', '--signals', type=str,
-                        help='Path to csv file that has signal_id column to use as the signal')
+    # Make problems action
+    problems = action.add_parser('make-problems', help='Create GreenGuard problems')
+    problems.set_defaults(action=_make_problems)
+    problems.add_argument('target-times-paths', nargs='+', help='List of target times paths.')
+    problems.add_argument('readings-path', type=str, help='Path to the readings folder.')
+    problems.add_argument('-w', '--window-size', nargs='+', required=False,
+                          help='List of window sizes values to benchmark.')
+    problems.add_argument('-r', '--resample-rule', nargs='+', required=False,
+                          help='List of resample rule to benchmark.')
+    problems.add_argument('-o', '--output', type=str,
+                          help='Output path where to save the generated problems.')
+    problems.add_argument('-s', '--signals', type=str,
+                          help='Path to csv file that has signal_id column to use as the signal')
 
     return parser
 
 
 def main():
-    warnings.filterwarnings("ignore")
-
     # Parse args
     parser = _get_parser()
     if len(sys.argv) < 2:
@@ -612,8 +724,10 @@ def main():
         sys.exit(0)
 
     args = parser.parse_args()
+    _setup_logging(args)
     args.action(args)
 
 
 if __name__ == '__main__':
+    warnings.filterwarnings("ignore")
     main()

From 9884001ee822f9aab629e8a172a3b1ad34bf309a Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 8 Oct 2020 10:20:13 +0200
Subject: [PATCH 106/171] Fix lint

---
 greenguard/benchmark.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 9bbfea7..1731c58 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -280,7 +280,6 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
             template_params = _build_init_params(template, window_size, rule, template_params)
             template_preprocessing = preprocessing[template]
 
-
             result = evaluate_template(
                 template=template,
                 target_times=target_times,
@@ -414,7 +413,7 @@ def make_problems(target_times_paths, readings_path, window_size_resample_rule,
 def run_benchmark(templates, problems, window_size_resample_rule=None,
                   tuning_iterations=50, signals=None, preprocessing=0, init_params=None,
                   metric='f1', cost=False, cv_splits=5, test_size=0.33, random_state=0,
-                  cache_path=None,cache_results=None, output_path=None):
+                  cache_path=None, cache_results=None, output_path=None):
     """Execute the benchmark function and optionally store the result as a ``CSV``.
 
     This function provides a user-friendly interface to interact with the ``evaluate_templates``

From 28780f8e725cb8f41308e45d7d9d3cfe833028ea Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 8 Oct 2020 10:21:39 +0200
Subject: [PATCH 107/171] Update dependencies

---
 setup.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/setup.py b/setup.py
index 3fe60b7..f29bf8f 100644
--- a/setup.py
+++ b/setup.py
@@ -16,20 +16,23 @@
     history = ''
 
 install_requires = [
-    'xlsxwriter>=1.3.6<1.4',
-    'matplotlib<3.2.2',
-    'boto3==1.14.44',
-    'botocore==1.17.44',
     'baytune>=0.3.9,<0.4',
-    'tabulate>=0.8.3,<0.9',
-    'Keras>=2.1.6,<2.4',
     'mlblocks>=0.3.4,<0.4',
     'mlprimitives>=0.2.5,<0.3',
+    'pymongo>=3.7.2,<4',
+    'scikit-learn>=0.20.0,<0.21',
+    'tqdm<4.50.0,>=4.36.1',
+    'cloudpickle>=1.6,<2',
     'scipy>=1.0.1,<1.4.0',
     'numpy>=1.15.4,<1.17',
-    'pymongo>=3.7.2,<4',
-    'scikit-learn>=0.20.1,<0.21',
-    'dask>=2.6.0,<3'
+    'pandas>=0.23.4,<0.25',
+    'dask>=2.6.0,<3',
+    'Keras>=2.1.6,<2.4',
+    'tabulate>=0.8.3,<0.9',
+    'xlsxwriter>=1.3.6<1.4',
+    #'matplotlib<3.2.2',
+    'boto3==1.14.44',
+    'botocore==1.17.44',
 ]
 
 setup_requires = [
@@ -71,6 +74,7 @@
     # Advanced testing
     'coverage>=4.5.1,<6',
     'tox>=2.9.1,<4',
+    'importlib-metadata<2,>=0.12',
 ]
 
 setup(

From 6aa8ed69d5bf88e4aa94c7dd8938e1fb7ed897ad Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 8 Oct 2020 10:21:59 +0200
Subject: [PATCH 108/171] Turn off verbosity

---
 greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json b/greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json
index b0550ee..5c82d77 100644
--- a/greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json
+++ b/greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json
@@ -42,7 +42,7 @@
             "encode": false,
             "max_depth": -1,
             "copy": true,
-            "verbose": true,
+            "verbose": false,
             "n_jobs": 1,
             "training_window": "1d"
         }

From 254b264512cb4adcc319c5db269f26faa371c1fe Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 8 Oct 2020 10:29:44 +0200
Subject: [PATCH 109/171] Load pipeline from a file

---
 greenguard/pipeline.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index a46c2c6..4d5fb86 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -283,7 +283,11 @@ def _get_templates(self, templates):
         for template in templates:
             if isinstance(template, str):
                 template_name = template
-                template = deepcopy(load_pipeline(template_name))
+                if os.path.isfile(template):
+                    with open(template, 'r') as json_file:
+                        template = json.load(json_file)
+                else:
+                    template = deepcopy(load_pipeline(template_name))
             else:
                 template_name = md5(json.dumps(template)).digest()
 

From 83e2c6217709dac1510b5337970d2df802e5598c Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 8 Oct 2020 14:24:11 +0200
Subject: [PATCH 110/171] Add sumarize

---
 greenguard/benchmark.py |  20 ++++++-
 greenguard/results.py   | 119 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 136 insertions(+), 3 deletions(-)
 create mode 100644 greenguard/results.py

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 1731c58..4d5dd81 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -18,6 +18,7 @@
 from greenguard.loaders import CSVLoader
 from greenguard.metrics import METRICS
 from greenguard.pipeline import GreenGuardPipeline, generate_init_params, generate_preprocessing
+from greenguard.results import load_results, write_results
 
 LOGGER = logging.getLogger(__name__)
 
@@ -597,6 +598,7 @@ def _setup_logging(args):
 
 
 def _run(args):
+    _setup_logging(args)
     if args.templates is None:
         args.templates = get_pipelines()
 
@@ -632,8 +634,21 @@ def _run(args):
         ))
 
 
-def summarize_results(input_path, output_path):
-    pass
+def summarize_results(input_paths, output_path):
+    """Load multiple benchmark results CSV files and compile a summary.
+
+    The result is an Excel file with one tab for each results CSV file
+    and an additional Number of Wins tab with a summary.
+
+    Args:
+        inputs_paths (list[str]):
+            List of paths to CSV files where the benchmarks results are stored.
+            These files must have one column per Tuner and one row per Challenge.
+        output_path (str):
+            Path, including the filename, where the Excel file will be created.
+    """
+    results = load_results(input_paths)
+    write_results(results, output_path)
 
 
 def _summarize_results(args):
@@ -723,7 +738,6 @@ def main():
         sys.exit(0)
 
     args = parser.parse_args()
-    _setup_logging(args)
     args.action(args)
 
 
diff --git a/greenguard/results.py b/greenguard/results.py
new file mode 100644
index 0000000..869c26d
--- /dev/null
+++ b/greenguard/results.py
@@ -0,0 +1,119 @@
+import os
+
+import pandas as pd
+
+
+def load_results(files):
+    problems_results = dict()
+    for filename in files:
+        problem = os.path.basename(filename).replace('.csv', '')
+        problems_results[problem] = pd.read_csv(filename, index_col=0).round(6)
+
+    return problems_results
+
+
+def get_wins_by_problems(results):
+    df = results.groupby('problem_name')['template', 'window_size', 'resample_rule', 'tuned_test']
+    df = df.apply(max)
+    df = df.rename(columns={'tuned_test': 'score'})
+
+    return df
+
+
+def get_exclusive_wins(scores, column, pivot_columns=['window_size', 'resample_rule']):
+    summary = {}
+    for problem in scores.problem_name.unique():
+        df = scores[scores['problem_name'] == problem]
+        df['wr'] = df.apply(
+            lambda row: '{}_{}'.format(row[pivot_columns[0]], row[pivot_columns[1]]), axis=1)
+        df = df.pivot(index='wr', columns=column, values='tuned_test')
+
+        is_winner = df.T.rank(method='min', ascending=False) == 1
+        num_winners = is_winner.sum()
+        is_exclusive = num_winners == 1
+        is_exclusive_winner = is_winner & is_exclusive
+        summary[problem] = is_exclusive_winner.sum(axis=1)
+
+    summary_df = pd.DataFrame(summary)
+    summary_df.index.name = 'template'
+    columns = summary_df.columns.sort_values(ascending=False)
+    return summary_df[columns]
+
+
+def add_sheet(dfs, name, writer, cell_fmt, index_fmt, header_fmt):
+    startrow = 0
+    widths = [0]
+    if not isinstance(dfs, dict):
+        dfs = {None: dfs}
+
+    for df_name, df in dfs.items():
+        df = df.reset_index()
+        startrow += bool(df_name)
+        df.to_excel(writer, sheet_name=name, startrow=startrow + 1, index=False, header=False)
+
+        worksheet = writer.sheets[name]
+
+        if df_name:
+            worksheet.write(startrow - 1, 0, df_name, index_fmt)
+            widths[0] = max(widths[0], len(df_name))
+
+        for idx, column in enumerate(df.columns):
+            worksheet.write(startrow, idx, column, header_fmt)
+            width = max(len(column), *df[column].astype(str).str.len()) + 1
+            if len(widths) > idx:
+                widths[idx] = max(widths[idx], width)
+            else:
+                widths.append(width)
+
+        startrow += len(df) + 2
+
+    for idx, width in enumerate(widths):
+        fmt = cell_fmt if idx else index_fmt
+        worksheet.set_column(idx, idx, width + 1, fmt)
+
+
+def write_results(results, output):
+    writer = pd.ExcelWriter(output, engine='xlsxwriter')
+    cell_fmt = writer.book.add_format({
+        "font_name": "Arial",
+        "font_size": "10"
+    })
+    index_fmt = writer.book.add_format({
+        "font_name": "Arial",
+        "font_size": "10",
+        "bold": True,
+    })
+    header_fmt = writer.book.add_format({
+        "font_name": "Arial",
+        "font_size": "10",
+        "bold": True,
+        "bottom": 1
+    })
+
+    if isinstance(results, dict):
+        results = pd.concat(list(results.values()), ignore_index=True)
+
+    window = get_exclusive_wins(results, 'window_size', ['window_size', 'tuned_test'])
+
+    resample_pivots = ['resample_rule', ['problem_name', 'tuned_test']]
+    resample = get_exclusive_wins(results, 'resample_rule', resample_pivots)
+
+    summary = {
+        'Best pipeline by Problem': get_wins_by_problems(results),
+        'Rankings - Number of wins': get_exclusive_wins(results, 'template'),
+        'Resample Rule': resample,
+        'Window Size': window
+    }
+    add_sheet(summary, 'Summary', writer, cell_fmt, index_fmt, header_fmt)
+
+    for problem in results['problem_name'].unique():
+        add_sheet(
+            results[results['problem_name'] == problem],
+            problem,
+            writer,
+            cell_fmt,
+            index_fmt,
+            header_fmt
+        )
+
+    writer.save()

From d529174f7f6b14d0f59dfb75ae548003b24274d5 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 8 Oct 2020 15:04:27 +0200
Subject: [PATCH 111/171] Fix tests

---
 greenguard/benchmark.py |  4 +++-
 setup.py                |  1 -
 tests/test_benchmark.py | 14 ++++++++++++--
 3 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 4d5dd81..d4807b8 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -314,7 +314,9 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
             df.to_csv(os.path.join(cache_results, file_name), index=False)
 
         scores_list.append(scores)
-        progress_bar.update(1)
+
+        if progress_bar:
+            progress_bar.update(1)
 
     results = pd.DataFrame.from_records(scores_list)
     results = results.reindex(LEADERBOARD_COLUMNS, axis=1)
diff --git a/setup.py b/setup.py
index f29bf8f..91ac49f 100644
--- a/setup.py
+++ b/setup.py
@@ -30,7 +30,6 @@
     'Keras>=2.1.6,<2.4',
     'tabulate>=0.8.3,<0.9',
     'xlsxwriter>=1.3.6<1.4',
-    #'matplotlib<3.2.2',
     'boto3==1.14.44',
     'botocore==1.17.44',
 ]
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 4dfe576..c18a565 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -32,13 +32,18 @@ def test_predict():
 
     # assert
     expected_columns = [
-        'template',
+        'problem_name',
         'window_size',
         'resample_rule',
+        'template',
         'default_test',
         'default_cv',
         'tuned_cv',
         'tuned_test',
+        'metric',
+        'fit_predict_time',
+        'cv_time',
+        'total_time',
         'status'
     ]
 
@@ -46,11 +51,16 @@ def test_predict():
         'object',
         'object',
         'object',
+        'object',
+        'float64',
         'float64',
         'float64',
         'float64',
         'float64',
-        'object'
+        'float64',
+        'float64',
+        'float64',
+        'object',
     ]
 
     assert (scores_df.columns.to_list() == expected_columns)

From 676dcc514bc168c387df061b70a47a6842a572ea Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Thu, 8 Oct 2020 17:16:17 +0200
Subject: [PATCH 112/171] Add default cv time and average cv time

---
 greenguard/benchmark.py | 12 ++++++++----
 tests/test_benchmark.py |  4 +++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index d4807b8..1585b75 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -33,7 +33,8 @@
     'tuned_test',
     'metric',
     'fit_predict_time',
-    'cv_time',
+    'default_cv_time',
+    'average_cv_time',
     'total_time',
     'status',
 ]
@@ -146,15 +147,17 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
     scores['default_test'] = metric(test['target'], predictions)
 
     # Computing the default cross validation score
-    cv_time = datetime.utcnow()
+    default_cv_time = datetime.utcnow()
     session = pipeline.tune(train, readings)
     session.run(1)
-    cv_time = datetime.utcnow() - cv_time
+    default_cv_time = datetime.utcnow() - default_cv_time
 
     scores['default_cv'] = pipeline.cv_score
 
     # Computing the cross validation score with tuned hyperparameters
+    average_cv_time = datetime.utcnow()
     session.run(tuning_iterations)
+    average_cv_time = (average_cv_time - datetime.utcnow()) / tuning_iterations
 
     scores['tuned_cv'] = pipeline.cv_score
 
@@ -164,7 +167,8 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
 
     scores['tuned_test'] = metric(test['target'], predictions)
     scores['fit_predict_time'] = fit_predict_time
-    scores['cv_time'] = cv_time
+    scores['default_cv_time'] = default_cv_time
+    scores['default_cv_time'] = default_cv_time
     scores['total_time'] = datetime.utcnow() - start_time
 
     return scores
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index c18a565..a1a2d6f 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -42,7 +42,8 @@ def test_predict():
         'tuned_test',
         'metric',
         'fit_predict_time',
-        'cv_time',
+        'default_cv_time',
+        'average_cv_time',
         'total_time',
         'status'
     ]
@@ -60,6 +61,7 @@ def test_predict():
         'float64',
         'float64',
         'float64',
+        'float64',
         'object',
     ]
 

From 36015927d999448f99cbc98bb18828a6d3af3aab Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 9 Oct 2020 01:14:34 +0200
Subject: [PATCH 113/171] Add average cv time.

---
 greenguard/benchmark.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 1585b75..eff50bc 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -169,6 +169,7 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
     scores['fit_predict_time'] = fit_predict_time
     scores['default_cv_time'] = default_cv_time
     scores['default_cv_time'] = default_cv_time
+    scores['average_cv_time'] = average_cv_time
     scores['total_time'] = datetime.utcnow() - start_time
 
     return scores

From de9aeef79a7b5fb2143725a70e120901cfa46718 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 9 Oct 2020 11:13:21 +0200
Subject: [PATCH 114/171] Add release notes for v0.2.5

---
 HISTORY.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/HISTORY.md b/HISTORY.md
index abc8226..d0c8364 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,14 @@
 # History
 
+## 0.2.5 - 2020-10-09
+
+With this release we include:
+
+* `run_benchmark`: A function within the module `benchmark` that allows the user to evaluate
+templates against problems with different window size and resample rules.
+* `summarize_results`: A function that given a `csv` file generates a `xlsx` file with a summary
+tab and a deatailed tab with the results from `run_benchmark`.
+
 ## 0.2.4 - 2020-09-25
 
 * Fix dependency errors

From d10f5a9163681a12ac525847197ad2ec4443bb2c Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 9 Oct 2020 11:13:27 +0200
Subject: [PATCH 115/171] =?UTF-8?q?Bump=20version:=200.2.5.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.2.5?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 4b2b765..cc7e309 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.5.dev0'
+__version__ = '0.2.5'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 5fdd02b..6614256 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.5.dev0
+current_version = 0.2.5
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 91ac49f..c3f7366 100644
--- a/setup.py
+++ b/setup.py
@@ -111,6 +111,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.5.dev0',
+    version='0.2.5',
     zip_safe=False,
 )

From 39674e3fa8d8a685413162e8133aefaa8c2163eb Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 9 Oct 2020 11:25:09 +0200
Subject: [PATCH 116/171] =?UTF-8?q?Bump=20version:=200.2.5=20=E2=86=92=200?=
 =?UTF-8?q?.2.6.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index cc7e309..662545d 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.5'
+__version__ = '0.2.6.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 6614256..919f5d6 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.5
+current_version = 0.2.6.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index c3f7366..3bce783 100644
--- a/setup.py
+++ b/setup.py
@@ -111,6 +111,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.5',
+    version='0.2.6.dev0',
     zip_safe=False,
 )

From e0ec043e1d0b4124705e2b22110caf8d8c2ea140 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev
 <41479552+pvk-developer@users.noreply.github.com>
Date: Fri, 23 Oct 2020 00:26:16 +0200
Subject: [PATCH 117/171] Intermediate outputs (#52)

* Step by step execution and pipeline inspection notebook

* Add step by step pipeline notbeooks.

* Update default value for output_ when predicting.

* Set verbose to false.

* Rerun notebooks.

Co-authored-by: Carles Sala <carles@pythiac.com>
---
 greenguard/pipeline.py                        |   27 +-
 .../normalize_dfs_xgb_classifier.json         |    2 +-
 .../normalize_dfs_xgb_classifier.ipynb        | 1697 +++++++++++
 ...ck_double_lstm_timeseries_classifier.ipynb | 2501 +++++++++++++++++
 .../unstack_lstm_timeseries_classifier.ipynb  | 2375 ++++++++++++++++
 ...unstack_normalize_dfs_xgb_classifier.ipynb | 1785 ++++++++++++
 6 files changed, 8379 insertions(+), 8 deletions(-)
 create mode 100644 tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
 create mode 100644 tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
 create mode 100644 tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
 create mode 100644 tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 4d5fb86..34504e3 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -527,7 +527,8 @@ def tune(self, target_times, readings, turbines=None):
         tunables = self._get_tunables(self._template_dicts)
         return BTBSession(tunables, scoring_function, maximize=not self._cost)
 
-    def fit(self, target_times, readings, turbines=None):
+    def fit(self, target_times=None, readings=None, turbines=None,
+            start_=None, output_=None, **kwargs):
         """Fit this pipeline to the given data.
 
         Args:
@@ -539,12 +540,23 @@ def fit(self, target_times, readings, turbines=None):
             turbines (pandas.DataFrame):
                 ``turbines`` table.
         """
-        X = target_times[['turbine_id', 'cutoff_time']]
-        y = target_times['target']
-        self._pipeline.fit(X, y, readings=readings, turbines=turbines)
-        self.fitted = True
+        if target_times is None:
+            X = kwargs.pop('X')
+            y = kwargs.pop('y')
+        else:
+            X = target_times[['turbine_id', 'cutoff_time']]
+            y = target_times['target']
+
+        out = self._pipeline.fit(X, y, readings=readings, turbines=turbines,
+                                 start_=start_, output_=output_, **kwargs)
+
+        if output_ is None:
+            self.fitted = True
+
+        return out
 
-    def predict(self, target_times, readings, turbines=None):
+    def predict(self, target_times=None, readings=None, turbines=None,
+                start_=None, output_='default', **kwargs):
         """Make predictions using this pipeline.
 
         Args:
@@ -564,7 +576,8 @@ def predict(self, target_times, readings, turbines=None):
             raise NotFittedError()
 
         X = target_times[['turbine_id', 'cutoff_time']]
-        return self._pipeline.predict(X, readings=readings, turbines=turbines)
+        return self._pipeline.predict(X, readings=readings, turbines=turbines,
+                                      start_=start_, output_=output_, **kwargs)
 
     def save(self, path):
         """Serialize and save this pipeline using cloudpickle.
diff --git a/greenguard/pipelines/normalize_dfs_xgb_classifier.json b/greenguard/pipelines/normalize_dfs_xgb_classifier.json
index 3d7d4d2..8039d12 100644
--- a/greenguard/pipelines/normalize_dfs_xgb_classifier.json
+++ b/greenguard/pipelines/normalize_dfs_xgb_classifier.json
@@ -44,7 +44,7 @@
             "encode": false,
             "max_depth": -1,
             "copy": true,
-            "verbose": true,
+            "verbose": false,
             "n_jobs": 1,
             "training_window": "1d"
         }
diff --git a/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb b/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
new file mode 100644
index 0000000..5bcb1ea
--- /dev/null
+++ b/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
@@ -0,0 +1,1697 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# normalize_dfs_xgb_classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from greenguard.demo import load_demo\n",
+    "\n",
+    "target_times, readings = load_demo()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_name = 'normalize_dfs_xgb_classifier'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from greenguard.pipeline import GreenGuardPipeline\n",
+    "\n",
+    "pipeline = GreenGuardPipeline(pipeline_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['pandas.DataFrame.resample',\n",
+       " 'featuretools.EntitySet.entity_from_dataframe',\n",
+       " 'featuretools.EntitySet.normalize_entity',\n",
+       " 'featuretools.EntitySet.normalize_entity',\n",
+       " 'featuretools.dfs',\n",
+       " 'mlprimitives.custom.feature_extraction.CategoricalEncoder',\n",
+       " 'xgboost.XGBClassifier']"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.template['primitives']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step by Step execution"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Input Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>320.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>284.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>348.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>273.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-10  323.0\n",
+       "1       T001       S02 2013-01-10  320.0\n",
+       "2       T001       S03 2013-01-10  284.0\n",
+       "3       T001       S04 2013-01-10  348.0\n",
+       "4       T001       S05 2013-01-10  273.0"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-13</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-14</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-15</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-16</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id cutoff_time  target\n",
+       "0       T001  2013-01-12       0\n",
+       "1       T001  2013-01-13       0\n",
+       "2       T001  2013-01-14       0\n",
+       "3       T001  2013-01-15       1\n",
+       "4       T001  2013-01-16       0"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Preparation (part of GreenGuard Pipeline)\n",
+    "\n",
+    "* Input: target_times, readings, turbines\n",
+    "* Output: X, y, readings, turbines\n",
+    "* Effect: target_times has been split into X and y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.resample\n",
+    "\n",
+    "* Input: readings\n",
+    "* Output: readings (resampled)\n",
+    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
+    "  signal_id and timestamp have been set as a multi-index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 0\n",
+    "context = pipeline.fit(target_times, readings, output_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id           timestamp  value\n",
+       "0       T001       S01 2013-01-10 00:00:00  323.0\n",
+       "1       T001       S01 2013-01-10 00:10:00  346.0\n",
+       "2       T001       S01 2013-01-10 00:20:00  407.0\n",
+       "3       T001       S01 2013-01-10 00:30:00  257.0\n",
+       "4       T001       S01 2013-01-10 00:40:00  267.0"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##  featuretools.EntitySet.entity_from_dataframe\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: entityset\n",
+    "* Effect: Entityset has been generated from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 1\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y', 'entityset'])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Entityset: entityset\n",
+       "  Entities:\n",
+       "    readings [Rows: 1329146, Columns: 5]\n",
+       "  Relationships:\n",
+       "    No relationships"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['entityset']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id           timestamp  value\n",
+       "0       T001       S01 2013-01-10 00:00:00  323.0\n",
+       "1       T001       S01 2013-01-10 00:10:00  346.0\n",
+       "2       T001       S01 2013-01-10 00:20:00  407.0\n",
+       "3       T001       S01 2013-01-10 00:30:00  257.0\n",
+       "4       T001       S01 2013-01-10 00:40:00  267.0"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## featuretools.EntitySet.normalize_entity\n",
+    "\n",
+    "* Input: entityset\n",
+    "* Output: entityset with relationship (readings.turbine_id with turbines.turbine_id)\n",
+    "* Effect: establish relation between readings and turbines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 2\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Entityset: entityset\n",
+       "  Entities:\n",
+       "    readings [Rows: 1329146, Columns: 5]\n",
+       "    turbines [Rows: 1, Columns: 1]\n",
+       "  Relationships:\n",
+       "    readings.turbine_id -> turbines.turbine_id"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['entityset']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## featuretools.EntitySet.normalize_entity\n",
+    "\n",
+    "* Input: entityset\n",
+    "* Output: entityset with relationship (readings.signal_id with signals.signal_id)\n",
+    "* Effect: establish relationship between readings and signals"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 3\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Entityset: entityset\n",
+       "  Entities:\n",
+       "    readings [Rows: 1329146, Columns: 5]\n",
+       "    turbines [Rows: 1, Columns: 1]\n",
+       "    signals [Rows: 26, Columns: 1]\n",
+       "  Relationships:\n",
+       "    readings.turbine_id -> turbines.turbine_id\n",
+       "    readings.signal_id -> signals.signal_id"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['entityset']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## featuretools.dfs\n",
+    "\n",
+    "* Input: entityset (unstacked, no turbine_id, no timestamp)\n",
+    "* Output: X (has additional features)\n",
+    "* Effect: build features for relational dataset using DFS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 4\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SUM(readings.value)</th>\n",
+       "      <th>STD(readings.value)</th>\n",
+       "      <th>MAX(readings.value)</th>\n",
+       "      <th>SKEW(readings.value)</th>\n",
+       "      <th>MIN(readings.value)</th>\n",
+       "      <th>MEAN(readings.value)</th>\n",
+       "      <th>COUNT(readings)</th>\n",
+       "      <th>NUM_UNIQUE(readings.signal_id)</th>\n",
+       "      <th>MODE(readings.signal_id)</th>\n",
+       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
+       "      <th>...</th>\n",
+       "      <th>MEAN(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>MEAN(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>3.457475e+09</td>\n",
+       "      <td>1.456852e+06</td>\n",
+       "      <td>3448719.0</td>\n",
+       "      <td>1.019212</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>917102.224456</td>\n",
+       "      <td>3770</td>\n",
+       "      <td>26</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>3.465358e+09</td>\n",
+       "      <td>1.459852e+06</td>\n",
+       "      <td>3453777.0</td>\n",
+       "      <td>1.018760</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>919193.186021</td>\n",
+       "      <td>3770</td>\n",
+       "      <td>26</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>5</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>3.479406e+09</td>\n",
+       "      <td>1.465252e+06</td>\n",
+       "      <td>3463880.0</td>\n",
+       "      <td>1.018192</td>\n",
+       "      <td>2.7</td>\n",
+       "      <td>922919.430027</td>\n",
+       "      <td>3770</td>\n",
+       "      <td>26</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>13</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>6</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>3.499427e+09</td>\n",
+       "      <td>1.473308e+06</td>\n",
+       "      <td>3474703.0</td>\n",
+       "      <td>1.017664</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>928229.883899</td>\n",
+       "      <td>3770</td>\n",
+       "      <td>26</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>2.912289e+09</td>\n",
+       "      <td>1.477955e+06</td>\n",
+       "      <td>3485019.0</td>\n",
+       "      <td>1.031879</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>924242.895144</td>\n",
+       "      <td>3770</td>\n",
+       "      <td>26</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 99 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            SUM(readings.value)  STD(readings.value)  MAX(readings.value)  \\\n",
+       "turbine_id                                                                  \n",
+       "T001               3.457475e+09         1.456852e+06            3448719.0   \n",
+       "T001               3.465358e+09         1.459852e+06            3453777.0   \n",
+       "T001               3.479406e+09         1.465252e+06            3463880.0   \n",
+       "T001               3.499427e+09         1.473308e+06            3474703.0   \n",
+       "T001               2.912289e+09         1.477955e+06            3485019.0   \n",
+       "\n",
+       "            SKEW(readings.value)  MIN(readings.value)  MEAN(readings.value)  \\\n",
+       "turbine_id                                                                    \n",
+       "T001                    1.019212                  0.0         917102.224456   \n",
+       "T001                    1.018760                  0.0         919193.186021   \n",
+       "T001                    1.018192                  2.7         922919.430027   \n",
+       "T001                    1.017664                 -1.0         928229.883899   \n",
+       "T001                    1.031879                  0.0         924242.895144   \n",
+       "\n",
+       "            COUNT(readings)  NUM_UNIQUE(readings.signal_id)  \\\n",
+       "turbine_id                                                    \n",
+       "T001                   3770                              26   \n",
+       "T001                   3770                              26   \n",
+       "T001                   3770                              26   \n",
+       "T001                   3770                              26   \n",
+       "T001                   3770                              26   \n",
+       "\n",
+       "           MODE(readings.signal_id)  NUM_UNIQUE(readings.DAY(timestamp))  ...  \\\n",
+       "turbine_id                                                                ...   \n",
+       "T001                            S01                                    2  ...   \n",
+       "T001                            S01                                    2  ...   \n",
+       "T001                            S01                                    2  ...   \n",
+       "T001                            S01                                    2  ...   \n",
+       "T001                            S01                                    2  ...   \n",
+       "\n",
+       "            MEAN(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))  \\\n",
+       "turbine_id                                                                 \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "\n",
+       "            MEAN(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                                   \n",
+       "T001                                                        2                \n",
+       "T001                                                        2                \n",
+       "T001                                                        2                \n",
+       "T001                                                        2                \n",
+       "T001                                                        2                \n",
+       "\n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
+       "turbine_id                                                                 \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "\n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                               \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "\n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
+       "turbine_id                                                                \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "\n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                                   \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "\n",
+       "            MODE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
+       "turbine_id                                                           \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "\n",
+       "            MODE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                         \n",
+       "T001                                                       11      \n",
+       "T001                                                       12      \n",
+       "T001                                                       13      \n",
+       "T001                                                       14      \n",
+       "T001                                                       15      \n",
+       "\n",
+       "            MODE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
+       "turbine_id                                                          \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "\n",
+       "            MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \n",
+       "turbine_id                                                            \n",
+       "T001                                                        4         \n",
+       "T001                                                        5         \n",
+       "T001                                                        6         \n",
+       "T001                                                        0         \n",
+       "T001                                                        1         \n",
+       "\n",
+       "[5 rows x 99 columns]"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "99"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# features generated (the turbine_id is set as index).\n",
+    "len(context['X'].columns)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id           timestamp  value\n",
+       "0       T001       S01 2013-01-10 00:00:00  323.0\n",
+       "1       T001       S01 2013-01-10 00:10:00  346.0\n",
+       "2       T001       S01 2013-01-10 00:20:00  407.0\n",
+       "3       T001       S01 2013-01-10 00:30:00  257.0\n",
+       "4       T001       S01 2013-01-10 00:40:00  267.0"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mlprimitives.custom.feature_extraction.CategoricalEncoder\n",
+    "\n",
+    "* Input: X\n",
+    "* Output: X (label encoded)\n",
+    "* Effect: encodes categorical features using OneHotLabelEncoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 5\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SUM(readings.value)</th>\n",
+       "      <th>STD(readings.value)</th>\n",
+       "      <th>MAX(readings.value)</th>\n",
+       "      <th>SKEW(readings.value)</th>\n",
+       "      <th>MIN(readings.value)</th>\n",
+       "      <th>MEAN(readings.value)</th>\n",
+       "      <th>COUNT(readings)</th>\n",
+       "      <th>NUM_UNIQUE(readings.signal_id)</th>\n",
+       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
+       "      <th>...</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>MODE(readings.signal_id)=S01</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.turbine_id))=T001</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>3.457475e+09</td>\n",
+       "      <td>1.456852e+06</td>\n",
+       "      <td>3448719.0</td>\n",
+       "      <td>1.019212</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>917102.224456</td>\n",
+       "      <td>3770</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>3.465358e+09</td>\n",
+       "      <td>1.459852e+06</td>\n",
+       "      <td>3453777.0</td>\n",
+       "      <td>1.018760</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>919193.186021</td>\n",
+       "      <td>3770</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>3.479406e+09</td>\n",
+       "      <td>1.465252e+06</td>\n",
+       "      <td>3463880.0</td>\n",
+       "      <td>1.018192</td>\n",
+       "      <td>2.7</td>\n",
+       "      <td>922919.430027</td>\n",
+       "      <td>3770</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>13</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>3.499427e+09</td>\n",
+       "      <td>1.473308e+06</td>\n",
+       "      <td>3474703.0</td>\n",
+       "      <td>1.017664</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>928229.883899</td>\n",
+       "      <td>3770</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>2.912289e+09</td>\n",
+       "      <td>1.477955e+06</td>\n",
+       "      <td>3485019.0</td>\n",
+       "      <td>1.031879</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>924242.895144</td>\n",
+       "      <td>3770</td>\n",
+       "      <td>26</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 99 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            SUM(readings.value)  STD(readings.value)  MAX(readings.value)  \\\n",
+       "turbine_id                                                                  \n",
+       "T001               3.457475e+09         1.456852e+06            3448719.0   \n",
+       "T001               3.465358e+09         1.459852e+06            3453777.0   \n",
+       "T001               3.479406e+09         1.465252e+06            3463880.0   \n",
+       "T001               3.499427e+09         1.473308e+06            3474703.0   \n",
+       "T001               2.912289e+09         1.477955e+06            3485019.0   \n",
+       "\n",
+       "            SKEW(readings.value)  MIN(readings.value)  MEAN(readings.value)  \\\n",
+       "turbine_id                                                                    \n",
+       "T001                    1.019212                  0.0         917102.224456   \n",
+       "T001                    1.018760                  0.0         919193.186021   \n",
+       "T001                    1.018192                  2.7         922919.430027   \n",
+       "T001                    1.017664                 -1.0         928229.883899   \n",
+       "T001                    1.031879                  0.0         924242.895144   \n",
+       "\n",
+       "            COUNT(readings)  NUM_UNIQUE(readings.signal_id)  \\\n",
+       "turbine_id                                                    \n",
+       "T001                   3770                              26   \n",
+       "T001                   3770                              26   \n",
+       "T001                   3770                              26   \n",
+       "T001                   3770                              26   \n",
+       "T001                   3770                              26   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
+       "turbine_id                                        \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.MONTH(timestamp))  ...  \\\n",
+       "turbine_id                                         ...   \n",
+       "T001                                            1  ...   \n",
+       "T001                                            1  ...   \n",
+       "T001                                            1  ...   \n",
+       "T001                                            1  ...   \n",
+       "T001                                            1  ...   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
+       "turbine_id                                                                 \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "\n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                               \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "\n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
+       "turbine_id                                                                \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "\n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                                   \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "\n",
+       "            MODE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
+       "turbine_id                                                           \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "\n",
+       "            MODE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                         \n",
+       "T001                                                       11      \n",
+       "T001                                                       12      \n",
+       "T001                                                       13      \n",
+       "T001                                                       14      \n",
+       "T001                                                       15      \n",
+       "\n",
+       "            MODE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
+       "turbine_id                                                          \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "\n",
+       "            MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                             \n",
+       "T001                                                        4          \n",
+       "T001                                                        5          \n",
+       "T001                                                        6          \n",
+       "T001                                                        0          \n",
+       "T001                                                        1          \n",
+       "\n",
+       "            MODE(readings.signal_id)=S01  \\\n",
+       "turbine_id                                 \n",
+       "T001                                   1   \n",
+       "T001                                   1   \n",
+       "T001                                   1   \n",
+       "T001                                   1   \n",
+       "T001                                   1   \n",
+       "\n",
+       "            MODE(readings.signals.MODE(readings.turbine_id))=T001  \n",
+       "turbine_id                                                         \n",
+       "T001                                                        1      \n",
+       "T001                                                        1      \n",
+       "T001                                                        1      \n",
+       "T001                                                        1      \n",
+       "T001                                                        1      \n",
+       "\n",
+       "[5 rows x 99 columns]"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id           timestamp  value\n",
+       "0       T001       S01 2013-01-10 00:00:00  323.0\n",
+       "1       T001       S01 2013-01-10 00:10:00  346.0\n",
+       "2       T001       S01 2013-01-10 00:20:00  407.0\n",
+       "3       T001       S01 2013-01-10 00:30:00  257.0\n",
+       "4       T001       S01 2013-01-10 00:40:00  267.0"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## xgboost.XGBClassifier\n",
+    "\n",
+    "* Input: X (label encoded and featurized)\n",
+    "* Output: None\n",
+    "* Effect: trained model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 6\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb b/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
new file mode 100644
index 0000000..5c7b442
--- /dev/null
+++ b/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
@@ -0,0 +1,2501 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# unstack_double_lstm_timeseries_classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from greenguard.demo import load_demo\n",
+    "\n",
+    "target_times, readings = load_demo()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_name = 'unstack_double_lstm_timeseries_classifier'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from greenguard.pipeline import GreenGuardPipeline\n",
+    "\n",
+    "pipeline = GreenGuardPipeline(pipeline_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['pandas.DataFrame.resample',\n",
+       " 'pandas.DataFrame.unstack',\n",
+       " 'pandas.DataFrame.pop',\n",
+       " 'pandas.DataFrame.pop',\n",
+       " 'sklearn.impute.SimpleImputer',\n",
+       " 'sklearn.preprocessing.MinMaxScaler',\n",
+       " 'pandas.DataFrame',\n",
+       " 'pandas.DataFrame.set',\n",
+       " 'pandas.DataFrame.set',\n",
+       " 'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
+       " 'keras.Sequential.DoubleLSTMTimeSeriesClassifier']"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.template['primitives']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step by Step execution"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Input Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>320.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>284.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>348.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>273.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-10  323.0\n",
+       "1       T001       S02 2013-01-10  320.0\n",
+       "2       T001       S03 2013-01-10  284.0\n",
+       "3       T001       S04 2013-01-10  348.0\n",
+       "4       T001       S05 2013-01-10  273.0"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-13</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-14</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-15</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-16</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id cutoff_time  target\n",
+       "0       T001  2013-01-12       0\n",
+       "1       T001  2013-01-13       0\n",
+       "2       T001  2013-01-14       0\n",
+       "3       T001  2013-01-15       1\n",
+       "4       T001  2013-01-16       0"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Preparation (part of GreenGuard Pipeline)\n",
+    "\n",
+    "* Input: target_times, readings, turbines\n",
+    "* Output: X, y, readings, turbines\n",
+    "* Effect: target_times has been split into X and y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.resample\n",
+    "\n",
+    "* Input: readings\n",
+    "* Output: readings (resampled)\n",
+    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
+    "  signal_id and timestamp have been set as a multi-index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "context = pipeline.fit(target_times, readings, output_=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">T001</th>\n",
+       "      <th rowspan=\"5\" valign=\"top\">S01</th>\n",
+       "      <th>2013-01-10 00:00:00</th>\n",
+       "      <td>313.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 01:00:00</th>\n",
+       "      <td>197.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 02:00:00</th>\n",
+       "      <td>248.166667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 03:00:00</th>\n",
+       "      <td>253.166667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 04:00:00</th>\n",
+       "      <td>305.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               value\n",
+       "turbine_id signal_id timestamp                      \n",
+       "T001       S01       2013-01-10 00:00:00  313.333333\n",
+       "                     2013-01-10 01:00:00  197.500000\n",
+       "                     2013-01-10 02:00:00  248.166667\n",
+       "                     2013-01-10 03:00:00  253.166667\n",
+       "                     2013-01-10 04:00:00  305.000000"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.unstack\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: readings (unstacked)\n",
+    "* Effect: readings have been unstacked"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 1\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id           timestamp   value_S01   value_S02   value_S03  \\\n",
+       "0       T001 2013-01-10 00:00:00  313.333333  323.833333  336.000000   \n",
+       "1       T001 2013-01-10 01:00:00  197.500000  221.333333  216.000000   \n",
+       "2       T001 2013-01-10 02:00:00  248.166667  271.666667  277.500000   \n",
+       "3       T001 2013-01-10 03:00:00  253.166667  256.166667  242.666667   \n",
+       "4       T001 2013-01-10 04:00:00  305.000000  312.333333  346.166667   \n",
+       "\n",
+       "    value_S04   value_S05   value_S06   value_S07     value_S08  ...  \\\n",
+       "0  364.666667  286.500000  314.000000  243.166667  3.197980e+06  ...   \n",
+       "1  260.666667  206.833333  235.833333  186.666667  3.198221e+06  ...   \n",
+       "2  298.000000  233.666667  271.166667  216.333333  3.198448e+06  ...   \n",
+       "3  265.333333  211.666667  226.666667  181.000000  3.198691e+06  ...   \n",
+       "4  329.833333  280.666667  308.833333  271.833333  3.198978e+06  ...   \n",
+       "\n",
+       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
+       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
+       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
+       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
+       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
+       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
+       "\n",
+       "   value_S23  value_S24  value_S25   value_S26  \n",
+       "0  47.666667  52.666667  40.833333  357.333333  \n",
+       "1  34.500000  39.333333  31.166667  249.666667  \n",
+       "2  39.333333  45.500000  36.166667  297.666667  \n",
+       "3  35.333333  37.833333  30.333333  268.000000  \n",
+       "4  46.166667  49.666667  41.166667  341.833333  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.pop\n",
+    "\n",
+    "* Input: readings (unstacked)\n",
+    "* Output: readings (without turbine_id), turbine_id\n",
+    "* Effect: turbine_id has been popped from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 2\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    T001\n",
+       "1    T001\n",
+       "2    T001\n",
+       "3    T001\n",
+       "4    T001\n",
+       "Name: turbine_id, dtype: object"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['turbine_id'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>value_S09</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
+       "      <td>695143.166667</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
+       "      <td>695403.666667</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
+       "      <td>695656.500000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
+       "      <td>695911.333333</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
+       "      <td>696195.833333</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            timestamp   value_S01   value_S02   value_S03   value_S04  \\\n",
+       "0 2013-01-10 00:00:00  313.333333  323.833333  336.000000  364.666667   \n",
+       "1 2013-01-10 01:00:00  197.500000  221.333333  216.000000  260.666667   \n",
+       "2 2013-01-10 02:00:00  248.166667  271.666667  277.500000  298.000000   \n",
+       "3 2013-01-10 03:00:00  253.166667  256.166667  242.666667  265.333333   \n",
+       "4 2013-01-10 04:00:00  305.000000  312.333333  346.166667  329.833333   \n",
+       "\n",
+       "    value_S05   value_S06   value_S07     value_S08      value_S09  ...  \\\n",
+       "0  286.500000  314.000000  243.166667  3.197980e+06  695143.166667  ...   \n",
+       "1  206.833333  235.833333  186.666667  3.198221e+06  695403.666667  ...   \n",
+       "2  233.666667  271.166667  216.333333  3.198448e+06  695656.500000  ...   \n",
+       "3  211.666667  226.666667  181.000000  3.198691e+06  695911.333333  ...   \n",
+       "4  280.666667  308.833333  271.833333  3.198978e+06  696195.833333  ...   \n",
+       "\n",
+       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
+       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
+       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
+       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
+       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
+       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
+       "\n",
+       "   value_S23  value_S24  value_S25   value_S26  \n",
+       "0  47.666667  52.666667  40.833333  357.333333  \n",
+       "1  34.500000  39.333333  31.166667  249.666667  \n",
+       "2  39.333333  45.500000  36.166667  297.666667  \n",
+       "3  35.333333  37.833333  30.333333  268.000000  \n",
+       "4  46.166667  49.666667  41.166667  341.833333  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.pop\n",
+    "\n",
+    "* Input: readings (without turbine_id)\n",
+    "* Output: readings (without timestamp), timestamp\n",
+    "* Effect: timestamp has been popped from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 3\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0   2013-01-10 00:00:00\n",
+       "1   2013-01-10 01:00:00\n",
+       "2   2013-01-10 02:00:00\n",
+       "3   2013-01-10 03:00:00\n",
+       "4   2013-01-10 04:00:00\n",
+       "Name: timestamp, dtype: datetime64[ns]"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['timestamp'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>value_S09</th>\n",
+       "      <th>value_S10</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
+       "      <td>695143.166667</td>\n",
+       "      <td>3.348384e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
+       "      <td>695403.666667</td>\n",
+       "      <td>3.348651e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
+       "      <td>695656.500000</td>\n",
+       "      <td>3.348910e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
+       "      <td>695911.333333</td>\n",
+       "      <td>3.349157e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
+       "      <td>696195.833333</td>\n",
+       "      <td>3.349452e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    value_S01   value_S02   value_S03   value_S04   value_S05   value_S06  \\\n",
+       "0  313.333333  323.833333  336.000000  364.666667  286.500000  314.000000   \n",
+       "1  197.500000  221.333333  216.000000  260.666667  206.833333  235.833333   \n",
+       "2  248.166667  271.666667  277.500000  298.000000  233.666667  271.166667   \n",
+       "3  253.166667  256.166667  242.666667  265.333333  211.666667  226.666667   \n",
+       "4  305.000000  312.333333  346.166667  329.833333  280.666667  308.833333   \n",
+       "\n",
+       "    value_S07     value_S08      value_S09     value_S10  ...  value_S17  \\\n",
+       "0  243.166667  3.197980e+06  695143.166667  3.348384e+06  ...  10.383333   \n",
+       "1  186.666667  3.198221e+06  695403.666667  3.348651e+06  ...   8.666667   \n",
+       "2  216.333333  3.198448e+06  695656.500000  3.348910e+06  ...   8.833333   \n",
+       "3  181.000000  3.198691e+06  695911.333333  3.349157e+06  ...   8.433333   \n",
+       "4  271.833333  3.198978e+06  696195.833333  3.349452e+06  ...   9.083333   \n",
+       "\n",
+       "      value_S18  value_S19  value_S20  value_S21  value_S22  value_S23  \\\n",
+       "0  3.131958e+06  52.666667  54.333333  56.166667  61.000000  47.666667   \n",
+       "1  3.133668e+06  33.166667  37.000000  36.166667  43.666667  34.500000   \n",
+       "2  3.135413e+06  41.500000  45.666667  46.500000  49.666667  39.333333   \n",
+       "3  3.137001e+06  42.333333  42.833333  40.500000  44.166667  35.333333   \n",
+       "4  3.138843e+06  50.500000  51.166667  55.500000  53.666667  46.166667   \n",
+       "\n",
+       "   value_S24  value_S25   value_S26  \n",
+       "0  52.666667  40.833333  357.333333  \n",
+       "1  39.333333  31.166667  249.666667  \n",
+       "2  45.500000  36.166667  297.666667  \n",
+       "3  37.833333  30.333333  268.000000  \n",
+       "4  49.666667  41.166667  341.833333  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sklearn.impute.SimpleImputer\n",
+    "\n",
+    "* Input: readings (unstacked, no turbine_id, no timestamp)\n",
+    "* Output: readings (imputed, numpy array)\n",
+    "* Effect: readings have been imputed and converted to numpy array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 4\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[3.13333333e+02, 3.23833333e+02, 3.36000000e+02, 3.64666667e+02,\n",
+       "        2.86500000e+02, 3.14000000e+02, 2.43166667e+02, 3.19798000e+06,\n",
+       "        6.95143167e+05, 3.34838383e+06, 3.43692150e+06, 3.32248667e+06,\n",
+       "        3.35809000e+06, 3.22390150e+06, 7.95000000e+00, 5.85000000e+00,\n",
+       "        1.03833333e+01, 3.13195833e+06, 5.26666667e+01, 5.43333333e+01,\n",
+       "        5.61666667e+01, 6.10000000e+01, 4.76666667e+01, 5.26666667e+01,\n",
+       "        4.08333333e+01, 3.57333333e+02],\n",
+       "       [1.97500000e+02, 2.21333333e+02, 2.16000000e+02, 2.60666667e+02,\n",
+       "        2.06833333e+02, 2.35833333e+02, 1.86666667e+02, 3.19822067e+06,\n",
+       "        6.95403667e+05, 3.34865117e+06, 3.43722283e+06, 3.32272200e+06,\n",
+       "        3.35834000e+06, 3.22409567e+06, 6.83333333e+00, 5.15000000e+00,\n",
+       "        8.66666667e+00, 3.13366817e+06, 3.31666667e+01, 3.70000000e+01,\n",
+       "        3.61666667e+01, 4.36666667e+01, 3.45000000e+01, 3.93333333e+01,\n",
+       "        3.11666667e+01, 2.49666667e+02],\n",
+       "       [2.48166667e+02, 2.71666667e+02, 2.77500000e+02, 2.98000000e+02,\n",
+       "        2.33666667e+02, 2.71166667e+02, 2.16333333e+02, 3.19844767e+06,\n",
+       "        6.95656500e+05, 3.34890967e+06, 3.43751900e+06, 3.32295950e+06,\n",
+       "        3.35862067e+06, 3.22432333e+06, 7.11666667e+00, 5.56666667e+00,\n",
+       "        8.83333333e+00, 3.13541283e+06, 4.15000000e+01, 4.56666667e+01,\n",
+       "        4.65000000e+01, 4.96666667e+01, 3.93333333e+01, 4.55000000e+01,\n",
+       "        3.61666667e+01, 2.97666667e+02],\n",
+       "       [2.53166667e+02, 2.56166667e+02, 2.42666667e+02, 2.65333333e+02,\n",
+       "        2.11666667e+02, 2.26666667e+02, 1.81000000e+02, 3.19869117e+06,\n",
+       "        6.95911333e+05, 3.34915717e+06, 3.43778050e+06, 3.32316850e+06,\n",
+       "        3.35884883e+06, 3.22450217e+06, 6.71666667e+00, 5.16666667e+00,\n",
+       "        8.43333333e+00, 3.13700133e+06, 4.23333333e+01, 4.28333333e+01,\n",
+       "        4.05000000e+01, 4.41666667e+01, 3.53333333e+01, 3.78333333e+01,\n",
+       "        3.03333333e+01, 2.68000000e+02],\n",
+       "       [3.05000000e+02, 3.12333333e+02, 3.46166667e+02, 3.29833333e+02,\n",
+       "        2.80666667e+02, 3.08833333e+02, 2.71833333e+02, 3.19897850e+06,\n",
+       "        6.96195833e+05, 3.34945200e+06, 3.43807767e+06, 3.32340933e+06,\n",
+       "        3.35910983e+06, 3.22471400e+06, 7.20000000e+00, 5.28333333e+00,\n",
+       "        9.08333333e+00, 3.13884333e+06, 5.05000000e+01, 5.11666667e+01,\n",
+       "        5.55000000e+01, 5.36666667e+01, 4.61666667e+01, 4.96666667e+01,\n",
+       "        4.11666667e+01, 3.41833333e+02]])"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'][0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sklearn.preprocessing.MinMaxScaler\n",
+    "\n",
+    "* Input: (imputed, array)\n",
+    "* Output: readings (scaled, array)\n",
+    "* Effect: readings have been scaled to [-1, 1] range"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 5\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.26126126, -0.23706897, -0.20870076, -0.14106583, -0.32328767,\n",
+       "        -0.25969448, -0.42198789, -1.        , -1.        , -1.        ,\n",
+       "        -1.        , -1.        , -1.        , -1.        , -0.11007463,\n",
+       "        -0.16824645, -0.10424155, -0.37397741, -0.25233645, -0.22716628,\n",
+       "        -0.20140515, -0.13481829, -0.32239156, -0.25380117, -0.4182243 ,\n",
+       "        -0.25697453],\n",
+       "       [-0.53349001, -0.47805643, -0.49088771, -0.38557994, -0.51037182,\n",
+       "        -0.44339992, -0.55438391, -0.99983031, -0.99982547, -0.99982499,\n",
+       "        -0.99980741, -0.9998428 , -0.99983779, -0.99986887, -0.23507463,\n",
+       "        -0.26777251, -0.25233645, -0.37363511, -0.52570093, -0.470726  ,\n",
+       "        -0.4824356 , -0.37866354, -0.50762016, -0.44093567, -0.55373832,\n",
+       "        -0.48085254],\n",
+       "       [-0.41441441, -0.35971787, -0.3462669 , -0.29780564, -0.44735812,\n",
+       "        -0.36036036, -0.48486624, -0.99967026, -0.99965608, -0.99965576,\n",
+       "        -0.99961813, -0.99968416, -0.99965569, -0.99971512, -0.20335821,\n",
+       "        -0.20853081, -0.2379583 , -0.37328583, -0.4088785 , -0.34894614,\n",
+       "        -0.33723653, -0.29425557, -0.43962485, -0.35438596, -0.48364486,\n",
+       "        -0.38104315],\n",
+       "       [-0.40266353, -0.39615987, -0.4281795 , -0.37460815, -0.49902153,\n",
+       "        -0.4649432 , -0.56766257, -0.99949857, -0.99948535, -0.99949373,\n",
+       "        -0.999451  , -0.99954455, -0.99950765, -0.99959435, -0.24813433,\n",
+       "        -0.26540284, -0.27246585, -0.37296782, -0.39719626, -0.38875878,\n",
+       "        -0.42154567, -0.37162954, -0.49589683, -0.4619883 , -0.56542056,\n",
+       "        -0.4427309 ],\n",
+       "       [-0.28084606, -0.26410658, -0.18479326, -0.22296238, -0.3369863 ,\n",
+       "        -0.27183705, -0.35481351, -0.99929598, -0.99929474, -0.99930071,\n",
+       "        -0.99926107, -0.99938368, -0.99933831, -0.9994513 , -0.19402985,\n",
+       "        -0.24881517, -0.21639109, -0.37259906, -0.28271028, -0.27166276,\n",
+       "        -0.21077283, -0.23798359, -0.34349355, -0.29590643, -0.4135514 ,\n",
+       "        -0.28920464]])"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'][0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame\n",
+    "\n",
+    "* Input: readings (scaled, array)\n",
+    "* Output: readings (dataframe)\n",
+    "* Effect: readings have been converted into a dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 6\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>16</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.104242</td>\n",
+       "      <td>-0.373977</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.373635</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.237958</td>\n",
+       "      <td>-0.373286</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.272466</td>\n",
+       "      <td>-0.372968</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.216391</td>\n",
+       "      <td>-0.372599</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         0         1         2         3         4         5         6   \\\n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
+       "\n",
+       "         7         8         9   ...        16        17        18        19  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.104242 -0.373977 -0.252336 -0.227166   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.252336 -0.373635 -0.525701 -0.470726   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.237958 -0.373286 -0.408879 -0.348946   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.272466 -0.372968 -0.397196 -0.388759   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.216391 -0.372599 -0.282710 -0.271663   \n",
+       "\n",
+       "         20        21        22        23        24        25  \n",
+       "0 -0.201405 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975  \n",
+       "1 -0.482436 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853  \n",
+       "2 -0.337237 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043  \n",
+       "3 -0.421546 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731  \n",
+       "4 -0.210773 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.set\n",
+    "\n",
+    "* Input: readings (dataframe)\n",
+    "* Output: readings (dataframe with turbine_id)\n",
+    "* Effect: turbine_id has been set as a readings column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 7\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "      <th>turbine_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373977</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373635</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373286</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.372968</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.372599</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1         2         3         4         5         6  \\\n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
+       "\n",
+       "          7         8         9  ...        17        18        19        20  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.373977 -0.252336 -0.227166 -0.201405   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.373635 -0.525701 -0.470726 -0.482436   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.373286 -0.408879 -0.348946 -0.337237   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.372968 -0.397196 -0.388759 -0.421546   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.372599 -0.282710 -0.271663 -0.210773   \n",
+       "\n",
+       "         21        22        23        24        25  turbine_id  \n",
+       "0 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975        T001  \n",
+       "1 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853        T001  \n",
+       "2 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043        T001  \n",
+       "3 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731        T001  \n",
+       "4 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205        T001  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.set\n",
+    "\n",
+    "* Input: readings (dataframe with turbine_id)\n",
+    "* Output: readings (dataframe with turbine_id and timestamp)\n",
+    "* Effect: timestamp has been set as a readings column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 8\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1         2         3         4         5         6  \\\n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
+       "\n",
+       "          7         8         9  ...        18        19        20        21  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.252336 -0.227166 -0.201405 -0.134818   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.525701 -0.470726 -0.482436 -0.378664   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.408879 -0.348946 -0.337237 -0.294256   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.397196 -0.388759 -0.421546 -0.371630   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.282710 -0.271663 -0.210773 -0.237984   \n",
+       "\n",
+       "         22        23        24        25  turbine_id           timestamp  \n",
+       "0 -0.322392 -0.253801 -0.418224 -0.256975        T001 2013-01-10 00:00:00  \n",
+       "1 -0.507620 -0.440936 -0.553738 -0.480853        T001 2013-01-10 01:00:00  \n",
+       "2 -0.439625 -0.354386 -0.483645 -0.381043        T001 2013-01-10 02:00:00  \n",
+       "3 -0.495897 -0.461988 -0.565421 -0.442731        T001 2013-01-10 03:00:00  \n",
+       "4 -0.343494 -0.295906 -0.413551 -0.289205        T001 2013-01-10 04:00:00  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences\n",
+    "\n",
+    "* Input: X, readings (dataframe with turbine_id and timestamp)\n",
+    "* Output: X\n",
+    "* Effect: X has been converted to a 3d numpy array that contains 1 matrix of shape\n",
+    "  (window_size x num_signals) for each one of the target times."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline._pipeline.get_hyperparameters()[\n",
+    "    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 9\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(8521, 28)"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(353,)"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['y'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(353, 24, 26)"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.58793576, -0.60305643, -0.63981971, -0.61481191, -0.69823875,\n",
+       "        -0.65021543, -0.68912322, -0.99436914, -0.99439755, -0.99454249,\n",
+       "        -0.99446788, -0.99476185, -0.99490997, -0.99529511, -0.34701493,\n",
+       "        -0.33886256, -0.33860532, -0.36301186, -0.57943925, -0.59250585,\n",
+       "        -0.6323185 , -0.60609613, -0.69284877, -0.64444444, -0.68691589,\n",
+       "        -0.63853752],\n",
+       "       [-0.56600078, -0.5846395 , -0.63002156, -0.61559561, -0.70880626,\n",
+       "        -0.66392479, -0.69732474, -0.9942427 , -0.99427986, -0.9944408 ,\n",
+       "        -0.99436498, -0.99468147, -0.99482011, -0.99521249, -0.33955224,\n",
+       "        -0.31516588, -0.38892883, -0.36280656, -0.55841121, -0.57611241,\n",
+       "        -0.62295082, -0.61078546, -0.70222743, -0.65847953, -0.69392523,\n",
+       "        -0.63645815],\n",
+       "       [-0.64081473, -0.64184953, -0.67038997, -0.63597179, -0.71350294,\n",
+       "        -0.65844105, -0.66764304, -0.99412236, -0.99416864, -0.99434228,\n",
+       "        -0.99426059, -0.99459663, -0.99472365, -0.99511795, -0.34328358,\n",
+       "        -0.30094787, -0.36304817, -0.36259859, -0.63317757, -0.6323185 ,\n",
+       "        -0.66042155, -0.62954279, -0.70926143, -0.65380117, -0.66588785,\n",
+       "        -0.66002426],\n",
+       "       [-0.73678026, -0.72139498, -0.72800314, -0.69239812, -0.71350294,\n",
+       "        -0.68233451, -0.69732474, -0.99403811, -0.99408512, -0.9942623 ,\n",
+       "        -0.99417111, -0.99451525, -0.99463206, -0.9950315 , -0.40671642,\n",
+       "        -0.36018957, -0.44644141, -0.36242395, -0.72897196, -0.71194379,\n",
+       "        -0.71896956, -0.68347011, -0.70926143, -0.6748538 , -0.69392523,\n",
+       "        -0.71027552],\n",
+       "       [-0.75401488, -0.74333856, -0.75112679, -0.71590909, -0.76555773,\n",
+       "        -0.73599687, -0.75278266, -0.99395808, -0.99400684, -0.99419094,\n",
+       "        -0.99409367, -0.99444556, -0.99455517, -0.99495418, -0.43656716,\n",
+       "        -0.3957346 , -0.465133  , -0.36226933, -0.7453271 , -0.735363  ,\n",
+       "        -0.74004684, -0.70926143, -0.76084408, -0.73099415, -0.75      ,\n",
+       "        -0.7463178 ],\n",
+       "       [-0.79866823, -0.76684953, -0.7558299 , -0.72688088, -0.76125245,\n",
+       "        -0.75714845, -0.78363601, -0.99389098, -0.99393583, -0.99411958,\n",
+       "        -0.99401538, -0.99437709, -0.99448423, -0.99489036, -0.43843284,\n",
+       "        -0.37914692, -0.49388929, -0.36212623, -0.78971963, -0.75644028,\n",
+       "        -0.7470726 , -0.72098476, -0.75615475, -0.7497076 , -0.78037383,\n",
+       "        -0.76572518],\n",
+       "       [-0.84919702, -0.83855799, -0.82245738, -0.78134796, -0.75225049,\n",
+       "        -0.70661966, -0.65787932, -0.99384186, -0.99388279, -0.9940635 ,\n",
+       "        -0.99395157, -0.9943113 , -0.99441264, -0.99481202, -0.51679104,\n",
+       "        -0.50473934, -0.53414809, -0.36199904, -0.8411215 , -0.83138173,\n",
+       "        -0.81264637, -0.77256741, -0.74677608, -0.70292398, -0.65654206,\n",
+       "        -0.77438919],\n",
+       "       [-0.69134352, -0.705721  , -0.73584166, -0.70297806, -0.75225049,\n",
+       "        -0.72659616, -0.71724273, -0.99377229, -0.99381646, -0.99400032,\n",
+       "        -0.99387925, -0.99423682, -0.99433003, -0.99471624, -0.43843284,\n",
+       "        -0.40521327, -0.48094896, -0.36184615, -0.68457944, -0.69555035,\n",
+       "        -0.72599532, -0.6975381 , -0.74677608, -0.71929825, -0.71261682,\n",
+       "        -0.71893953],\n",
+       "       [-0.84488837, -0.82915361, -0.83578287, -0.81896552, -0.86105675,\n",
+       "        -0.8613396 , -0.86330795, -0.99369779, -0.99374656, -0.99393715,\n",
+       "        -0.99381182, -0.99418494, -0.99427639, -0.99466379, -0.49253731,\n",
+       "        -0.48104265, -0.51545651, -0.36172116, -0.8364486 , -0.81967213,\n",
+       "        -0.82435597, -0.81008206, -0.85463072, -0.85497076, -0.86214953,\n",
+       "        -0.84889967],\n",
+       "       [-0.77908343, -0.78761755, -0.78757594, -0.78918495, -0.82348337,\n",
+       "        -0.82491187, -0.85276313, -0.99365725, -0.99370625, -0.99389819,\n",
+       "        -0.99377113, -0.99415254, -0.99424222, -0.99463329, -0.52798507,\n",
+       "        -0.6042654 , -0.51545651, -0.36164779, -0.77336449, -0.77985948,\n",
+       "        -0.78220141, -0.78429074, -0.86635404, -0.82222222, -0.85046729,\n",
+       "        -0.81562987],\n",
+       "       [-0.70544458, -0.64733542, -0.64844209, -0.61833856, -0.6481409 ,\n",
+       "        -0.66392479, -0.71646163, -0.99356747, -0.99360832, -0.99380327,\n",
+       "        -0.99367558, -0.99407272, -0.99415647, -0.99456035, -0.36567164,\n",
+       "        -0.4549763 , -0.34291876, -0.36146698, -0.70560748, -0.63934426,\n",
+       "        -0.63934426, -0.62016413, -0.64830012, -0.65847953, -0.72663551,\n",
+       "        -0.66868827],\n",
+       "       [-0.70387779, -0.67202194, -0.69508132, -0.72413793, -0.73228963,\n",
+       "        -0.72816295, -0.72310096, -0.99348204, -0.99351955, -0.99372023,\n",
+       "        -0.99359367, -0.99399256, -0.99407882, -0.99449203, -0.38432836,\n",
+       "        -0.58530806, -0.33141625, -0.36130226, -0.69392523, -0.66042155,\n",
+       "        -0.68384075, -0.71629543, -0.72801876, -0.72163743, -0.72196262,\n",
+       "        -0.7113152 ],\n",
+       "       [-0.8515472 , -0.81073668, -0.776602  , -0.76724138, -0.78277886,\n",
+       "        -0.75832354, -0.74262839, -0.99341682, -0.99344607, -0.99364669,\n",
+       "        -0.99352762, -0.99392743, -0.99401037, -0.99441763, -0.44029851,\n",
+       "        -0.5521327 , -0.38461538, -0.36116102, -0.84345794, -0.80327869,\n",
+       "        -0.76814988, -0.76084408, -0.77725674, -0.75204678, -0.73831776,\n",
+       "        -0.7865188 ],\n",
+       "       [-0.80258519, -0.83659875, -0.83499902, -0.79741379, -0.80821918,\n",
+       "        -0.81629456, -0.79379028, -0.99336347, -0.99339091, -0.99358745,\n",
+       "        -0.99346147, -0.9938642 , -0.99394733, -0.99434605, -0.44962687,\n",
+       "        -0.6563981 , -0.34579439, -0.36103606, -0.79439252, -0.82669789,\n",
+       "        -0.82669789, -0.78898007, -0.80304807, -0.81052632, -0.79205607,\n",
+       "        -0.81632299],\n",
+       "       [-0.83313749, -0.87539185, -0.90241035, -0.88440439, -0.86771037,\n",
+       "        -0.87935762, -0.87580551, -0.99331764, -0.99335898, -0.99355602,\n",
+       "        -0.99342259, -0.99382267, -0.99390959, -0.99430418, -0.54291045,\n",
+       "        -0.72274882, -0.42918763, -0.36096002, -0.82943925, -0.87119438,\n",
+       "        -0.89461358, -0.87573271, -0.86166471, -0.87134503, -0.87383178,\n",
+       "        -0.88078323],\n",
+       "       [-0.56678418, -0.60031348, -0.64295512, -0.78409091, -0.76164384,\n",
+       "        -0.78535057, -0.82464362, -0.99321481, -0.99327557, -0.99349034,\n",
+       "        -0.99337881, -0.9937915 , -0.99387347, -0.99427367, -0.32835821,\n",
+       "        -0.47630332, -0.25808771, -0.36084678, -0.56074766, -0.59250585,\n",
+       "        -0.6323185 , -0.77960141, -0.84759672, -0.78947368, -0.8364486 ,\n",
+       "        -0.72621729],\n",
+       "       [-0.77007442, -0.81230408, -0.83186361, -0.85540752, -0.85870841,\n",
+       "        -0.86486486, -0.847686  , -0.99311634, -0.99319338, -0.99341516,\n",
+       "        -0.99332651, -0.99374196, -0.99381551, -0.99422246, -0.46641791,\n",
+       "        -0.65165877, -0.39324227, -0.36071245, -0.76168224, -0.80093677,\n",
+       "        -0.82201405, -0.84759672, -0.85463072, -0.85730994, -0.84579439,\n",
+       "        -0.83780974],\n",
+       "       [-0.87622405, -0.92163009, -0.91377621, -0.89224138, -0.84540117,\n",
+       "        -0.83431257, -0.82112869, -0.99306816, -0.99315821, -0.99338734,\n",
+       "        -0.99329935, -0.99370611, -0.99377885, -0.9941789 , -0.55783582,\n",
+       "        -0.65402844, -0.50970525, -0.36064058, -0.86682243, -0.91334895,\n",
+       "        -0.90632319, -0.88745604, -0.84056272, -0.82923977, -0.81775701,\n",
+       "        -0.87731762],\n",
+       "       [-0.82843713, -0.83111285, -0.84166177, -0.8322884 , -0.84579256,\n",
+       "        -0.8515472 , -0.86057411, -0.99302656, -0.99312426, -0.99335155,\n",
+       "        -0.99325919, -0.99365991, -0.99373278, -0.99413129, -0.50559701,\n",
+       "        -0.53791469, -0.52120776, -0.36055736, -0.82242991, -0.82201405,\n",
+       "        -0.83138173, -0.82415006, -0.84056272, -0.84327485, -0.85747664,\n",
+       "        -0.84508751],\n",
+       "       [-0.74539757, -0.73824451, -0.76484421, -0.72100313, -0.73228963,\n",
+       "        -0.70975323, -0.739504  , -0.99296569, -0.99306553, -0.99329699,\n",
+       "        -0.9932005 , -0.99360224, -0.99367493, -0.99407862, -0.45149254,\n",
+       "        -0.46208531, -0.48382459, -0.36044105, -0.73598131, -0.73067916,\n",
+       "        -0.75644028, -0.71629543, -0.72801876, -0.70526316, -0.73831776,\n",
+       "        -0.73696067],\n",
+       "       [-0.40814728, -0.4596395 , -0.51087596, -0.46316614, -0.54598826,\n",
+       "        -0.50607129, -0.57039641, -0.99283748, -0.99294147, -0.9931881 ,\n",
+       "        -0.99308418, -0.99349681, -0.99356041, -0.99398047, -0.30597015,\n",
+       "        -0.29383886, -0.34867002, -0.36020709, -0.46728972, -0.470726  ,\n",
+       "        -0.5175644 , -0.48651817, -0.55685815, -0.51812865, -0.59579439,\n",
+       "        -0.5179345 ],\n",
+       "       [-0.47591069, -0.45219436, -0.48579267, -0.48981191, -0.57847358,\n",
+       "        -0.54876616, -0.61882445, -0.99268659, -0.99280044, -0.99306033,\n",
+       "        -0.99295359, -0.99338192, -0.99344287, -0.9938794 , -0.30223881,\n",
+       "        -0.33649289, -0.32278936, -0.35994787, -0.49065421, -0.46370023,\n",
+       "        -0.4941452 , -0.49589683, -0.58264947, -0.55321637, -0.62850467,\n",
+       "        -0.53110379],\n",
+       "       [-0.26792009, -0.27115987, -0.30080345, -0.24412226, -0.34246575,\n",
+       "        -0.30434783, -0.40285101, -0.99250927, -0.99261854, -0.99288914,\n",
+       "        -0.99278188, -0.99322495, -0.99327569, -0.9937324 , -0.22947761,\n",
+       "        -0.28909953, -0.26096334, -0.35960139, -0.33878505, -0.29976581,\n",
+       "        -0.32786885, -0.2919109 , -0.38100821, -0.32865497, -0.42523364,\n",
+       "        -0.3394559 ],\n",
+       "       [-0.31374853, -0.26449843, -0.2941407 , -0.23315047, -0.36516634,\n",
+       "        -0.35957697, -0.44112478, -0.9923035 , -0.99241264, -0.99269787,\n",
+       "        -0.99258055, -0.99304482, -0.99309553, -0.99356987, -0.2108209 ,\n",
+       "        -0.21563981, -0.23652049, -0.35921021, -0.30607477, -0.26229508,\n",
+       "        -0.29039813, -0.23563892, -0.35990621, -0.35204678, -0.43925234,\n",
+       "        -0.32004852]])"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'][0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## keras.Sequential.DoubleLSTMTimeSeriesClassifier\n",
+    "\n",
+    "* Input: X, y\n",
+    "* Output: \n",
+    "* Effect: DoubleLSTM has been fitted."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From /home/plamen/.virtualenvs/GreenGuard/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "If using Keras pass *_constraint arguments to layers.\n",
+      "WARNING:tensorflow:From /home/plamen/.virtualenvs/GreenGuard/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "step = 10\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb b/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
new file mode 100644
index 0000000..faec108
--- /dev/null
+++ b/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
@@ -0,0 +1,2375 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# unstack_lstm_timeseries_classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from greenguard.demo import load_demo\n",
+    "\n",
+    "target_times, readings = load_demo()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_name = 'unstack_lstm_timeseries_classifier'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from greenguard.pipeline import GreenGuardPipeline\n",
+    "\n",
+    "pipeline = GreenGuardPipeline(pipeline_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['pandas.DataFrame.resample',\n",
+       " 'pandas.DataFrame.unstack',\n",
+       " 'pandas.DataFrame.pop',\n",
+       " 'pandas.DataFrame.pop',\n",
+       " 'sklearn.impute.SimpleImputer',\n",
+       " 'sklearn.preprocessing.MinMaxScaler',\n",
+       " 'pandas.DataFrame',\n",
+       " 'pandas.DataFrame.set',\n",
+       " 'pandas.DataFrame.set',\n",
+       " 'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
+       " 'keras.Sequential.LSTMTimeSeriesClassifier']"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.template['primitives']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step by Step execution"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Input Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>320.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>284.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>348.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>273.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-10  323.0\n",
+       "1       T001       S02 2013-01-10  320.0\n",
+       "2       T001       S03 2013-01-10  284.0\n",
+       "3       T001       S04 2013-01-10  348.0\n",
+       "4       T001       S05 2013-01-10  273.0"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-13</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-14</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-15</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-16</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id cutoff_time  target\n",
+       "0       T001  2013-01-12       0\n",
+       "1       T001  2013-01-13       0\n",
+       "2       T001  2013-01-14       0\n",
+       "3       T001  2013-01-15       1\n",
+       "4       T001  2013-01-16       0"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Preparation (part of GreenGuard Pipeline)\n",
+    "\n",
+    "* Input: target_times, readings, turbines\n",
+    "* Output: X, y, readings, turbines\n",
+    "* Effect: target_times has been split into X and y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.resample\n",
+    "\n",
+    "* Input: readings\n",
+    "* Output: readings (resampled)\n",
+    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
+    "  signal_id and timestamp have been set as a multi-index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "context = pipeline.fit(target_times, readings, output_=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">T001</th>\n",
+       "      <th rowspan=\"5\" valign=\"top\">S01</th>\n",
+       "      <th>2013-01-10 00:00:00</th>\n",
+       "      <td>313.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 01:00:00</th>\n",
+       "      <td>197.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 02:00:00</th>\n",
+       "      <td>248.166667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 03:00:00</th>\n",
+       "      <td>253.166667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 04:00:00</th>\n",
+       "      <td>305.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               value\n",
+       "turbine_id signal_id timestamp                      \n",
+       "T001       S01       2013-01-10 00:00:00  313.333333\n",
+       "                     2013-01-10 01:00:00  197.500000\n",
+       "                     2013-01-10 02:00:00  248.166667\n",
+       "                     2013-01-10 03:00:00  253.166667\n",
+       "                     2013-01-10 04:00:00  305.000000"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.unstack\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: readings (unstacked)\n",
+    "* Effect: readings have been unstacked"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 1\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id           timestamp   value_S01   value_S02   value_S03  \\\n",
+       "0       T001 2013-01-10 00:00:00  313.333333  323.833333  336.000000   \n",
+       "1       T001 2013-01-10 01:00:00  197.500000  221.333333  216.000000   \n",
+       "2       T001 2013-01-10 02:00:00  248.166667  271.666667  277.500000   \n",
+       "3       T001 2013-01-10 03:00:00  253.166667  256.166667  242.666667   \n",
+       "4       T001 2013-01-10 04:00:00  305.000000  312.333333  346.166667   \n",
+       "\n",
+       "    value_S04   value_S05   value_S06   value_S07     value_S08  ...  \\\n",
+       "0  364.666667  286.500000  314.000000  243.166667  3.197980e+06  ...   \n",
+       "1  260.666667  206.833333  235.833333  186.666667  3.198221e+06  ...   \n",
+       "2  298.000000  233.666667  271.166667  216.333333  3.198448e+06  ...   \n",
+       "3  265.333333  211.666667  226.666667  181.000000  3.198691e+06  ...   \n",
+       "4  329.833333  280.666667  308.833333  271.833333  3.198978e+06  ...   \n",
+       "\n",
+       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
+       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
+       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
+       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
+       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
+       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
+       "\n",
+       "   value_S23  value_S24  value_S25   value_S26  \n",
+       "0  47.666667  52.666667  40.833333  357.333333  \n",
+       "1  34.500000  39.333333  31.166667  249.666667  \n",
+       "2  39.333333  45.500000  36.166667  297.666667  \n",
+       "3  35.333333  37.833333  30.333333  268.000000  \n",
+       "4  46.166667  49.666667  41.166667  341.833333  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.pop\n",
+    "\n",
+    "* Input: readings (unstacked)\n",
+    "* Output: readings (without turbine_id), turbine_id\n",
+    "* Effect: turbine_id has been popped from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 2\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    T001\n",
+       "1    T001\n",
+       "2    T001\n",
+       "3    T001\n",
+       "4    T001\n",
+       "Name: turbine_id, dtype: object"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['turbine_id'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>value_S09</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
+       "      <td>695143.166667</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
+       "      <td>695403.666667</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
+       "      <td>695656.500000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
+       "      <td>695911.333333</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
+       "      <td>696195.833333</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            timestamp   value_S01   value_S02   value_S03   value_S04  \\\n",
+       "0 2013-01-10 00:00:00  313.333333  323.833333  336.000000  364.666667   \n",
+       "1 2013-01-10 01:00:00  197.500000  221.333333  216.000000  260.666667   \n",
+       "2 2013-01-10 02:00:00  248.166667  271.666667  277.500000  298.000000   \n",
+       "3 2013-01-10 03:00:00  253.166667  256.166667  242.666667  265.333333   \n",
+       "4 2013-01-10 04:00:00  305.000000  312.333333  346.166667  329.833333   \n",
+       "\n",
+       "    value_S05   value_S06   value_S07     value_S08      value_S09  ...  \\\n",
+       "0  286.500000  314.000000  243.166667  3.197980e+06  695143.166667  ...   \n",
+       "1  206.833333  235.833333  186.666667  3.198221e+06  695403.666667  ...   \n",
+       "2  233.666667  271.166667  216.333333  3.198448e+06  695656.500000  ...   \n",
+       "3  211.666667  226.666667  181.000000  3.198691e+06  695911.333333  ...   \n",
+       "4  280.666667  308.833333  271.833333  3.198978e+06  696195.833333  ...   \n",
+       "\n",
+       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
+       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
+       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
+       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
+       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
+       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
+       "\n",
+       "   value_S23  value_S24  value_S25   value_S26  \n",
+       "0  47.666667  52.666667  40.833333  357.333333  \n",
+       "1  34.500000  39.333333  31.166667  249.666667  \n",
+       "2  39.333333  45.500000  36.166667  297.666667  \n",
+       "3  35.333333  37.833333  30.333333  268.000000  \n",
+       "4  46.166667  49.666667  41.166667  341.833333  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.pop\n",
+    "\n",
+    "* Input: readings (without turbine_id)\n",
+    "* Output: readings (without timestamp), timestamp\n",
+    "* Effect: timestamp has been popped from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 3\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0   2013-01-10 00:00:00\n",
+       "1   2013-01-10 01:00:00\n",
+       "2   2013-01-10 02:00:00\n",
+       "3   2013-01-10 03:00:00\n",
+       "4   2013-01-10 04:00:00\n",
+       "Name: timestamp, dtype: datetime64[ns]"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['timestamp'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>value_S09</th>\n",
+       "      <th>value_S10</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
+       "      <td>695143.166667</td>\n",
+       "      <td>3.348384e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
+       "      <td>695403.666667</td>\n",
+       "      <td>3.348651e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
+       "      <td>695656.500000</td>\n",
+       "      <td>3.348910e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
+       "      <td>695911.333333</td>\n",
+       "      <td>3.349157e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
+       "      <td>696195.833333</td>\n",
+       "      <td>3.349452e+06</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    value_S01   value_S02   value_S03   value_S04   value_S05   value_S06  \\\n",
+       "0  313.333333  323.833333  336.000000  364.666667  286.500000  314.000000   \n",
+       "1  197.500000  221.333333  216.000000  260.666667  206.833333  235.833333   \n",
+       "2  248.166667  271.666667  277.500000  298.000000  233.666667  271.166667   \n",
+       "3  253.166667  256.166667  242.666667  265.333333  211.666667  226.666667   \n",
+       "4  305.000000  312.333333  346.166667  329.833333  280.666667  308.833333   \n",
+       "\n",
+       "    value_S07     value_S08      value_S09     value_S10  ...  value_S17  \\\n",
+       "0  243.166667  3.197980e+06  695143.166667  3.348384e+06  ...  10.383333   \n",
+       "1  186.666667  3.198221e+06  695403.666667  3.348651e+06  ...   8.666667   \n",
+       "2  216.333333  3.198448e+06  695656.500000  3.348910e+06  ...   8.833333   \n",
+       "3  181.000000  3.198691e+06  695911.333333  3.349157e+06  ...   8.433333   \n",
+       "4  271.833333  3.198978e+06  696195.833333  3.349452e+06  ...   9.083333   \n",
+       "\n",
+       "      value_S18  value_S19  value_S20  value_S21  value_S22  value_S23  \\\n",
+       "0  3.131958e+06  52.666667  54.333333  56.166667  61.000000  47.666667   \n",
+       "1  3.133668e+06  33.166667  37.000000  36.166667  43.666667  34.500000   \n",
+       "2  3.135413e+06  41.500000  45.666667  46.500000  49.666667  39.333333   \n",
+       "3  3.137001e+06  42.333333  42.833333  40.500000  44.166667  35.333333   \n",
+       "4  3.138843e+06  50.500000  51.166667  55.500000  53.666667  46.166667   \n",
+       "\n",
+       "   value_S24  value_S25   value_S26  \n",
+       "0  52.666667  40.833333  357.333333  \n",
+       "1  39.333333  31.166667  249.666667  \n",
+       "2  45.500000  36.166667  297.666667  \n",
+       "3  37.833333  30.333333  268.000000  \n",
+       "4  49.666667  41.166667  341.833333  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sklearn.impute.SimpleImputer\n",
+    "\n",
+    "* Input: readings (unstacked, no turbine_id, no timestamp)\n",
+    "* Output: readings (imputed, numpy array)\n",
+    "* Effect: readings have been imputed and converted to numpy array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 4\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[3.13333333e+02, 3.23833333e+02, 3.36000000e+02, 3.64666667e+02,\n",
+       "        2.86500000e+02, 3.14000000e+02, 2.43166667e+02, 3.19798000e+06,\n",
+       "        6.95143167e+05, 3.34838383e+06, 3.43692150e+06, 3.32248667e+06,\n",
+       "        3.35809000e+06, 3.22390150e+06, 7.95000000e+00, 5.85000000e+00,\n",
+       "        1.03833333e+01, 3.13195833e+06, 5.26666667e+01, 5.43333333e+01,\n",
+       "        5.61666667e+01, 6.10000000e+01, 4.76666667e+01, 5.26666667e+01,\n",
+       "        4.08333333e+01, 3.57333333e+02],\n",
+       "       [1.97500000e+02, 2.21333333e+02, 2.16000000e+02, 2.60666667e+02,\n",
+       "        2.06833333e+02, 2.35833333e+02, 1.86666667e+02, 3.19822067e+06,\n",
+       "        6.95403667e+05, 3.34865117e+06, 3.43722283e+06, 3.32272200e+06,\n",
+       "        3.35834000e+06, 3.22409567e+06, 6.83333333e+00, 5.15000000e+00,\n",
+       "        8.66666667e+00, 3.13366817e+06, 3.31666667e+01, 3.70000000e+01,\n",
+       "        3.61666667e+01, 4.36666667e+01, 3.45000000e+01, 3.93333333e+01,\n",
+       "        3.11666667e+01, 2.49666667e+02],\n",
+       "       [2.48166667e+02, 2.71666667e+02, 2.77500000e+02, 2.98000000e+02,\n",
+       "        2.33666667e+02, 2.71166667e+02, 2.16333333e+02, 3.19844767e+06,\n",
+       "        6.95656500e+05, 3.34890967e+06, 3.43751900e+06, 3.32295950e+06,\n",
+       "        3.35862067e+06, 3.22432333e+06, 7.11666667e+00, 5.56666667e+00,\n",
+       "        8.83333333e+00, 3.13541283e+06, 4.15000000e+01, 4.56666667e+01,\n",
+       "        4.65000000e+01, 4.96666667e+01, 3.93333333e+01, 4.55000000e+01,\n",
+       "        3.61666667e+01, 2.97666667e+02],\n",
+       "       [2.53166667e+02, 2.56166667e+02, 2.42666667e+02, 2.65333333e+02,\n",
+       "        2.11666667e+02, 2.26666667e+02, 1.81000000e+02, 3.19869117e+06,\n",
+       "        6.95911333e+05, 3.34915717e+06, 3.43778050e+06, 3.32316850e+06,\n",
+       "        3.35884883e+06, 3.22450217e+06, 6.71666667e+00, 5.16666667e+00,\n",
+       "        8.43333333e+00, 3.13700133e+06, 4.23333333e+01, 4.28333333e+01,\n",
+       "        4.05000000e+01, 4.41666667e+01, 3.53333333e+01, 3.78333333e+01,\n",
+       "        3.03333333e+01, 2.68000000e+02],\n",
+       "       [3.05000000e+02, 3.12333333e+02, 3.46166667e+02, 3.29833333e+02,\n",
+       "        2.80666667e+02, 3.08833333e+02, 2.71833333e+02, 3.19897850e+06,\n",
+       "        6.96195833e+05, 3.34945200e+06, 3.43807767e+06, 3.32340933e+06,\n",
+       "        3.35910983e+06, 3.22471400e+06, 7.20000000e+00, 5.28333333e+00,\n",
+       "        9.08333333e+00, 3.13884333e+06, 5.05000000e+01, 5.11666667e+01,\n",
+       "        5.55000000e+01, 5.36666667e+01, 4.61666667e+01, 4.96666667e+01,\n",
+       "        4.11666667e+01, 3.41833333e+02]])"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'][0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sklearn.preprocessing.MinMaxScaler\n",
+    "\n",
+    "* Input: (imputed, array)\n",
+    "* Output: readings (scaled, array)\n",
+    "* Effect: readings have been scaled to [-1, 1] range"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 5\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.26126126, -0.23706897, -0.20870076, -0.14106583, -0.32328767,\n",
+       "        -0.25969448, -0.42198789, -1.        , -1.        , -1.        ,\n",
+       "        -1.        , -1.        , -1.        , -1.        , -0.11007463,\n",
+       "        -0.16824645, -0.10424155, -0.37397741, -0.25233645, -0.22716628,\n",
+       "        -0.20140515, -0.13481829, -0.32239156, -0.25380117, -0.4182243 ,\n",
+       "        -0.25697453],\n",
+       "       [-0.53349001, -0.47805643, -0.49088771, -0.38557994, -0.51037182,\n",
+       "        -0.44339992, -0.55438391, -0.99983031, -0.99982547, -0.99982499,\n",
+       "        -0.99980741, -0.9998428 , -0.99983779, -0.99986887, -0.23507463,\n",
+       "        -0.26777251, -0.25233645, -0.37363511, -0.52570093, -0.470726  ,\n",
+       "        -0.4824356 , -0.37866354, -0.50762016, -0.44093567, -0.55373832,\n",
+       "        -0.48085254],\n",
+       "       [-0.41441441, -0.35971787, -0.3462669 , -0.29780564, -0.44735812,\n",
+       "        -0.36036036, -0.48486624, -0.99967026, -0.99965608, -0.99965576,\n",
+       "        -0.99961813, -0.99968416, -0.99965569, -0.99971512, -0.20335821,\n",
+       "        -0.20853081, -0.2379583 , -0.37328583, -0.4088785 , -0.34894614,\n",
+       "        -0.33723653, -0.29425557, -0.43962485, -0.35438596, -0.48364486,\n",
+       "        -0.38104315],\n",
+       "       [-0.40266353, -0.39615987, -0.4281795 , -0.37460815, -0.49902153,\n",
+       "        -0.4649432 , -0.56766257, -0.99949857, -0.99948535, -0.99949373,\n",
+       "        -0.999451  , -0.99954455, -0.99950765, -0.99959435, -0.24813433,\n",
+       "        -0.26540284, -0.27246585, -0.37296782, -0.39719626, -0.38875878,\n",
+       "        -0.42154567, -0.37162954, -0.49589683, -0.4619883 , -0.56542056,\n",
+       "        -0.4427309 ],\n",
+       "       [-0.28084606, -0.26410658, -0.18479326, -0.22296238, -0.3369863 ,\n",
+       "        -0.27183705, -0.35481351, -0.99929598, -0.99929474, -0.99930071,\n",
+       "        -0.99926107, -0.99938368, -0.99933831, -0.9994513 , -0.19402985,\n",
+       "        -0.24881517, -0.21639109, -0.37259906, -0.28271028, -0.27166276,\n",
+       "        -0.21077283, -0.23798359, -0.34349355, -0.29590643, -0.4135514 ,\n",
+       "        -0.28920464]])"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'][0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame\n",
+    "\n",
+    "* Input: readings (scaled, array)\n",
+    "* Output: readings (dataframe)\n",
+    "* Effect: readings have been converted into a dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 6\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>16</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.104242</td>\n",
+       "      <td>-0.373977</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.373635</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.237958</td>\n",
+       "      <td>-0.373286</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.272466</td>\n",
+       "      <td>-0.372968</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.216391</td>\n",
+       "      <td>-0.372599</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         0         1         2         3         4         5         6   \\\n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
+       "\n",
+       "         7         8         9   ...        16        17        18        19  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.104242 -0.373977 -0.252336 -0.227166   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.252336 -0.373635 -0.525701 -0.470726   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.237958 -0.373286 -0.408879 -0.348946   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.272466 -0.372968 -0.397196 -0.388759   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.216391 -0.372599 -0.282710 -0.271663   \n",
+       "\n",
+       "         20        21        22        23        24        25  \n",
+       "0 -0.201405 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975  \n",
+       "1 -0.482436 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853  \n",
+       "2 -0.337237 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043  \n",
+       "3 -0.421546 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731  \n",
+       "4 -0.210773 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.set\n",
+    "\n",
+    "* Input: readings (dataframe)\n",
+    "* Output: readings (dataframe with turbine_id)\n",
+    "* Effect: turbine_id has been set as a readings column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 7\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "      <th>turbine_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373977</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373635</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373286</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.372968</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.372599</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1         2         3         4         5         6  \\\n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
+       "\n",
+       "          7         8         9  ...        17        18        19        20  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.373977 -0.252336 -0.227166 -0.201405   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.373635 -0.525701 -0.470726 -0.482436   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.373286 -0.408879 -0.348946 -0.337237   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.372968 -0.397196 -0.388759 -0.421546   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.372599 -0.282710 -0.271663 -0.210773   \n",
+       "\n",
+       "         21        22        23        24        25  turbine_id  \n",
+       "0 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975        T001  \n",
+       "1 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853        T001  \n",
+       "2 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043        T001  \n",
+       "3 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731        T001  \n",
+       "4 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205        T001  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.set\n",
+    "\n",
+    "* Input: readings (dataframe with turbine_id)\n",
+    "* Output: readings (dataframe with turbine_id and timestamp)\n",
+    "* Effect: timestamp has been set as a readings column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 8\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1         2         3         4         5         6  \\\n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
+       "\n",
+       "          7         8         9  ...        18        19        20        21  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.252336 -0.227166 -0.201405 -0.134818   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.525701 -0.470726 -0.482436 -0.378664   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.408879 -0.348946 -0.337237 -0.294256   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.397196 -0.388759 -0.421546 -0.371630   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.282710 -0.271663 -0.210773 -0.237984   \n",
+       "\n",
+       "         22        23        24        25  turbine_id           timestamp  \n",
+       "0 -0.322392 -0.253801 -0.418224 -0.256975        T001 2013-01-10 00:00:00  \n",
+       "1 -0.507620 -0.440936 -0.553738 -0.480853        T001 2013-01-10 01:00:00  \n",
+       "2 -0.439625 -0.354386 -0.483645 -0.381043        T001 2013-01-10 02:00:00  \n",
+       "3 -0.495897 -0.461988 -0.565421 -0.442731        T001 2013-01-10 03:00:00  \n",
+       "4 -0.343494 -0.295906 -0.413551 -0.289205        T001 2013-01-10 04:00:00  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences\n",
+    "\n",
+    "* Input: X, readings (dataframe with turbine_id and timestamp)\n",
+    "* Output: X\n",
+    "* Effect: X has been converted to a 3d numpy array that contains 1 matrix of shape\n",
+    "  (window_size x num_signals) for each one of the target times."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline._pipeline.get_hyperparameters()[\n",
+    "    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 9\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(8521, 28)"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(353,)"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['y'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 41,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(353, 24, 26)"
+      ]
+     },
+     "execution_count": 41,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 42,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.58793576, -0.60305643, -0.63981971, -0.61481191, -0.69823875,\n",
+       "        -0.65021543, -0.68912322, -0.99436914, -0.99439755, -0.99454249,\n",
+       "        -0.99446788, -0.99476185, -0.99490997, -0.99529511, -0.34701493,\n",
+       "        -0.33886256, -0.33860532, -0.36301186, -0.57943925, -0.59250585,\n",
+       "        -0.6323185 , -0.60609613, -0.69284877, -0.64444444, -0.68691589,\n",
+       "        -0.63853752],\n",
+       "       [-0.56600078, -0.5846395 , -0.63002156, -0.61559561, -0.70880626,\n",
+       "        -0.66392479, -0.69732474, -0.9942427 , -0.99427986, -0.9944408 ,\n",
+       "        -0.99436498, -0.99468147, -0.99482011, -0.99521249, -0.33955224,\n",
+       "        -0.31516588, -0.38892883, -0.36280656, -0.55841121, -0.57611241,\n",
+       "        -0.62295082, -0.61078546, -0.70222743, -0.65847953, -0.69392523,\n",
+       "        -0.63645815],\n",
+       "       [-0.64081473, -0.64184953, -0.67038997, -0.63597179, -0.71350294,\n",
+       "        -0.65844105, -0.66764304, -0.99412236, -0.99416864, -0.99434228,\n",
+       "        -0.99426059, -0.99459663, -0.99472365, -0.99511795, -0.34328358,\n",
+       "        -0.30094787, -0.36304817, -0.36259859, -0.63317757, -0.6323185 ,\n",
+       "        -0.66042155, -0.62954279, -0.70926143, -0.65380117, -0.66588785,\n",
+       "        -0.66002426]])"
+      ]
+     },
+     "execution_count": 42,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'][0][:3]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## keras.Sequential.LSTMTimeSeriesClassifier\n",
+    "\n",
+    "* Input: X, y\n",
+    "* Output: \n",
+    "* Effect: LSTM has been fitted."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 43,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "WARNING:tensorflow:From /home/plamen/.virtualenvs/GreenGuard/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
+      "Instructions for updating:\n",
+      "If using Keras pass *_constraint arguments to layers.\n",
+      "WARNING:tensorflow:From /home/plamen/.virtualenvs/GreenGuard/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "step = 10\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb b/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb
new file mode 100644
index 0000000..6af0092
--- /dev/null
+++ b/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb
@@ -0,0 +1,1785 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# unstack_normalize_dfs_xgb_classifier"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Using TensorFlow backend.\n"
+     ]
+    }
+   ],
+   "source": [
+    "from greenguard.demo import load_demo\n",
+    "\n",
+    "target_times, readings = load_demo()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_name = 'unstack_normalize_dfs_xgb_classifier'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from greenguard.pipeline import GreenGuardPipeline\n",
+    "\n",
+    "pipeline = GreenGuardPipeline(pipeline_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['pandas.DataFrame.resample',\n",
+       " 'pandas.DataFrame.unstack',\n",
+       " 'featuretools.EntitySet.entity_from_dataframe',\n",
+       " 'featuretools.EntitySet.normalize_entity',\n",
+       " 'featuretools.dfs',\n",
+       " 'mlprimitives.custom.feature_extraction.CategoricalEncoder',\n",
+       " 'xgboost.XGBClassifier']"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.template['primitives']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step by Step execution"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Input Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>320.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>284.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>348.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>273.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-10  323.0\n",
+       "1       T001       S02 2013-01-10  320.0\n",
+       "2       T001       S03 2013-01-10  284.0\n",
+       "3       T001       S04 2013-01-10  348.0\n",
+       "4       T001       S05 2013-01-10  273.0"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-13</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-14</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-15</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-16</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id cutoff_time  target\n",
+       "0       T001  2013-01-12       0\n",
+       "1       T001  2013-01-13       0\n",
+       "2       T001  2013-01-14       0\n",
+       "3       T001  2013-01-15       1\n",
+       "4       T001  2013-01-16       0"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Preparation (part of GreenGuard Pipeline)\n",
+    "\n",
+    "* Input: target_times, readings, turbines\n",
+    "* Output: X, y, readings, turbines\n",
+    "* Effect: target_times has been split into X and y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.resample\n",
+    "\n",
+    "* Input: readings\n",
+    "* Output: readings (resampled)\n",
+    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
+    "  signal_id and timestamp have been set as a multi-index"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 0\n",
+    "context = pipeline.fit(target_times, readings, output_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">T001</th>\n",
+       "      <th rowspan=\"5\" valign=\"top\">S01</th>\n",
+       "      <th>2013-01-10 00:00:00</th>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 00:10:00</th>\n",
+       "      <td>346.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 00:20:00</th>\n",
+       "      <td>407.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 00:30:00</th>\n",
+       "      <td>257.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 00:40:00</th>\n",
+       "      <td>267.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                          value\n",
+       "turbine_id signal_id timestamp                 \n",
+       "T001       S01       2013-01-10 00:00:00  323.0\n",
+       "                     2013-01-10 00:10:00  346.0\n",
+       "                     2013-01-10 00:20:00  407.0\n",
+       "                     2013-01-10 00:30:00  257.0\n",
+       "                     2013-01-10 00:40:00  267.0"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.unstack\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: readings (unstacked)\n",
+    "* Effect: readings have been unstacked"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 1\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id           timestamp  value_S01  value_S02  value_S03  value_S04  \\\n",
+       "0       T001 2013-01-10 00:00:00      323.0      320.0      284.0      348.0   \n",
+       "1       T001 2013-01-10 00:10:00      346.0      384.0      367.0      411.0   \n",
+       "2       T001 2013-01-10 00:20:00      407.0      363.0      407.0      393.0   \n",
+       "3       T001 2013-01-10 00:30:00      257.0      307.0      315.0      361.0   \n",
+       "4       T001 2013-01-10 00:40:00      267.0      309.0      314.0      355.0   \n",
+       "\n",
+       "   value_S05  value_S06  value_S07  value_S08  ...  value_S17  value_S18  \\\n",
+       "0      273.0      342.0      280.0  3197842.0  ...       11.7  3131020.0   \n",
+       "1      331.0      360.0      249.0  3197900.0  ...       10.2  3131420.0   \n",
+       "2      275.0      335.0      270.0  3197968.0  ...        9.5  3131822.0   \n",
+       "3      317.0      354.0      271.0  3198011.0  ...       10.5  3132179.0   \n",
+       "4      262.0      246.0      212.0  3198056.0  ...        9.6  3132501.0   \n",
+       "\n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "\n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "##  featuretools.EntitySet.entity_from_dataframe\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: entityset\n",
+    "* Effect: Entityset has been generated from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 2\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y', 'entityset'])"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Entityset: entityset\n",
+       "  Entities:\n",
+       "    readings [Rows: 51121, Columns: 29]\n",
+       "  Relationships:\n",
+       "    No relationships"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['entityset']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## featuretools.EntitySet.normalize_entity\n",
+    "\n",
+    "* Input: entityset\n",
+    "* Output: entityset with relationship (readings.turbine_id with turbines.turbine_id)\n",
+    "* Effect: establish relation between readings and turbines"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 3\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Entityset: entityset\n",
+       "  Entities:\n",
+       "    readings [Rows: 51121, Columns: 29]\n",
+       "    turbines [Rows: 1, Columns: 1]\n",
+       "  Relationships:\n",
+       "    readings.turbine_id -> turbines.turbine_id"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['entityset']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## featuretools.dfs\n",
+    "\n",
+    "* Input: entityset (unstacked, no turbine_id, no timestamp)\n",
+    "* Output: X (has additional features)\n",
+    "* Effect: build features for relational dataset using DFS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 4\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys() "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SUM(readings.value_S09)</th>\n",
+       "      <th>SUM(readings.value_S01)</th>\n",
+       "      <th>SUM(readings.value_S12)</th>\n",
+       "      <th>SUM(readings.value_S10)</th>\n",
+       "      <th>SUM(readings.value_S18)</th>\n",
+       "      <th>SUM(readings.value_S03)</th>\n",
+       "      <th>SUM(readings.value_S16)</th>\n",
+       "      <th>SUM(readings.value_S11)</th>\n",
+       "      <th>SUM(readings.value_S21)</th>\n",
+       "      <th>SUM(readings.value_S08)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>MEAN(readings.value_S20)</th>\n",
+       "      <th>COUNT(readings)</th>\n",
+       "      <th>NUM_UNIQUE(readings.WEEKDAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
+       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
+       "      <th>MODE(readings.DAY(timestamp))</th>\n",
+       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
+       "      <th>MODE(readings.MONTH(timestamp))</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>102204875.0</td>\n",
+       "      <td>19558.0</td>\n",
+       "      <td>483068250.0</td>\n",
+       "      <td>486911931.0</td>\n",
+       "      <td>463347422.0</td>\n",
+       "      <td>18602.0</td>\n",
+       "      <td>555.2</td>\n",
+       "      <td>499808026.0</td>\n",
+       "      <td>3090.0</td>\n",
+       "      <td>465058755.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>22.406897</td>\n",
+       "      <td>145</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>11</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>102808505.0</td>\n",
+       "      <td>37965.0</td>\n",
+       "      <td>483585662.0</td>\n",
+       "      <td>487487610.0</td>\n",
+       "      <td>467167621.0</td>\n",
+       "      <td>34495.0</td>\n",
+       "      <td>719.2</td>\n",
+       "      <td>500401347.0</td>\n",
+       "      <td>4970.0</td>\n",
+       "      <td>465669184.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>35.282759</td>\n",
+       "      <td>145</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>12</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>103701788.0</td>\n",
+       "      <td>73948.0</td>\n",
+       "      <td>484538080.0</td>\n",
+       "      <td>488531121.0</td>\n",
+       "      <td>473938223.0</td>\n",
+       "      <td>77804.0</td>\n",
+       "      <td>921.1</td>\n",
+       "      <td>501472849.0</td>\n",
+       "      <td>9902.0</td>\n",
+       "      <td>466675578.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>53.255172</td>\n",
+       "      <td>145</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>13</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>104917985.0</td>\n",
+       "      <td>87206.0</td>\n",
+       "      <td>486012792.0</td>\n",
+       "      <td>490024295.0</td>\n",
+       "      <td>483808936.0</td>\n",
+       "      <td>81629.0</td>\n",
+       "      <td>977.2</td>\n",
+       "      <td>502994331.0</td>\n",
+       "      <td>10720.0</td>\n",
+       "      <td>468099974.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>61.482759</td>\n",
+       "      <td>145</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>14</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>84328762.0</td>\n",
+       "      <td>61778.0</td>\n",
+       "      <td>389879083.0</td>\n",
+       "      <td>396521849.0</td>\n",
+       "      <td>492596536.0</td>\n",
+       "      <td>65122.0</td>\n",
+       "      <td>954.3</td>\n",
+       "      <td>403671026.0</td>\n",
+       "      <td>8684.0</td>\n",
+       "      <td>375635231.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>87.315789</td>\n",
+       "      <td>145</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 165 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            SUM(readings.value_S09)  SUM(readings.value_S01)  \\\n",
+       "turbine_id                                                     \n",
+       "T001                    102204875.0                  19558.0   \n",
+       "T001                    102808505.0                  37965.0   \n",
+       "T001                    103701788.0                  73948.0   \n",
+       "T001                    104917985.0                  87206.0   \n",
+       "T001                     84328762.0                  61778.0   \n",
+       "\n",
+       "            SUM(readings.value_S12)  SUM(readings.value_S10)  \\\n",
+       "turbine_id                                                     \n",
+       "T001                    483068250.0              486911931.0   \n",
+       "T001                    483585662.0              487487610.0   \n",
+       "T001                    484538080.0              488531121.0   \n",
+       "T001                    486012792.0              490024295.0   \n",
+       "T001                    389879083.0              396521849.0   \n",
+       "\n",
+       "            SUM(readings.value_S18)  SUM(readings.value_S03)  \\\n",
+       "turbine_id                                                     \n",
+       "T001                    463347422.0                  18602.0   \n",
+       "T001                    467167621.0                  34495.0   \n",
+       "T001                    473938223.0                  77804.0   \n",
+       "T001                    483808936.0                  81629.0   \n",
+       "T001                    492596536.0                  65122.0   \n",
+       "\n",
+       "            SUM(readings.value_S16)  SUM(readings.value_S11)  \\\n",
+       "turbine_id                                                     \n",
+       "T001                          555.2              499808026.0   \n",
+       "T001                          719.2              500401347.0   \n",
+       "T001                          921.1              501472849.0   \n",
+       "T001                          977.2              502994331.0   \n",
+       "T001                          954.3              403671026.0   \n",
+       "\n",
+       "            SUM(readings.value_S21)  SUM(readings.value_S08)  ...  \\\n",
+       "turbine_id                                                    ...   \n",
+       "T001                         3090.0              465058755.0  ...   \n",
+       "T001                         4970.0              465669184.0  ...   \n",
+       "T001                         9902.0              466675578.0  ...   \n",
+       "T001                        10720.0              468099974.0  ...   \n",
+       "T001                         8684.0              375635231.0  ...   \n",
+       "\n",
+       "            MEAN(readings.value_S20)  COUNT(readings)  \\\n",
+       "turbine_id                                              \n",
+       "T001                       22.406897              145   \n",
+       "T001                       35.282759              145   \n",
+       "T001                       53.255172              145   \n",
+       "T001                       61.482759              145   \n",
+       "T001                       87.315789              145   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.WEEKDAY(timestamp))  \\\n",
+       "turbine_id                                            \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
+       "turbine_id                                        \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.YEAR(timestamp))  \\\n",
+       "turbine_id                                         \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.MONTH(timestamp))  \\\n",
+       "turbine_id                                          \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "\n",
+       "            MODE(readings.WEEKDAY(timestamp))  MODE(readings.DAY(timestamp))  \\\n",
+       "turbine_id                                                                     \n",
+       "T001                                        4                             11   \n",
+       "T001                                        5                             12   \n",
+       "T001                                        6                             13   \n",
+       "T001                                        0                             14   \n",
+       "T001                                        1                             15   \n",
+       "\n",
+       "            MODE(readings.YEAR(timestamp))  MODE(readings.MONTH(timestamp))  \n",
+       "turbine_id                                                                   \n",
+       "T001                                  2013                                1  \n",
+       "T001                                  2013                                1  \n",
+       "T001                                  2013                                1  \n",
+       "T001                                  2013                                1  \n",
+       "T001                                  2013                                1  \n",
+       "\n",
+       "[5 rows x 165 columns]"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "165"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# features generated (the turbine_id is set as index).\n",
+    "len(context['X'].columns)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mlprimitives.custom.feature_extraction.CategoricalEncoder\n",
+    "\n",
+    "* Input: X\n",
+    "* Output: X (label encoded)\n",
+    "* Effect: encodes categorical features using OneHotLabelEncoder"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 5\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>SUM(readings.value_S09)</th>\n",
+       "      <th>SUM(readings.value_S01)</th>\n",
+       "      <th>SUM(readings.value_S12)</th>\n",
+       "      <th>SUM(readings.value_S10)</th>\n",
+       "      <th>SUM(readings.value_S18)</th>\n",
+       "      <th>SUM(readings.value_S03)</th>\n",
+       "      <th>SUM(readings.value_S16)</th>\n",
+       "      <th>SUM(readings.value_S11)</th>\n",
+       "      <th>SUM(readings.value_S21)</th>\n",
+       "      <th>SUM(readings.value_S08)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>MEAN(readings.value_S20)</th>\n",
+       "      <th>COUNT(readings)</th>\n",
+       "      <th>NUM_UNIQUE(readings.WEEKDAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
+       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
+       "      <th>MODE(readings.DAY(timestamp))</th>\n",
+       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
+       "      <th>MODE(readings.MONTH(timestamp))</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>102204875.0</td>\n",
+       "      <td>19558.0</td>\n",
+       "      <td>483068250.0</td>\n",
+       "      <td>486911931.0</td>\n",
+       "      <td>463347422.0</td>\n",
+       "      <td>18602.0</td>\n",
+       "      <td>555.2</td>\n",
+       "      <td>499808026.0</td>\n",
+       "      <td>3090.0</td>\n",
+       "      <td>465058755.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>22.406897</td>\n",
+       "      <td>145</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>4</td>\n",
+       "      <td>11</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>102808505.0</td>\n",
+       "      <td>37965.0</td>\n",
+       "      <td>483585662.0</td>\n",
+       "      <td>487487610.0</td>\n",
+       "      <td>467167621.0</td>\n",
+       "      <td>34495.0</td>\n",
+       "      <td>719.2</td>\n",
+       "      <td>500401347.0</td>\n",
+       "      <td>4970.0</td>\n",
+       "      <td>465669184.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>35.282759</td>\n",
+       "      <td>145</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>5</td>\n",
+       "      <td>12</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>103701788.0</td>\n",
+       "      <td>73948.0</td>\n",
+       "      <td>484538080.0</td>\n",
+       "      <td>488531121.0</td>\n",
+       "      <td>473938223.0</td>\n",
+       "      <td>77804.0</td>\n",
+       "      <td>921.1</td>\n",
+       "      <td>501472849.0</td>\n",
+       "      <td>9902.0</td>\n",
+       "      <td>466675578.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>53.255172</td>\n",
+       "      <td>145</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>13</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>104917985.0</td>\n",
+       "      <td>87206.0</td>\n",
+       "      <td>486012792.0</td>\n",
+       "      <td>490024295.0</td>\n",
+       "      <td>483808936.0</td>\n",
+       "      <td>81629.0</td>\n",
+       "      <td>977.2</td>\n",
+       "      <td>502994331.0</td>\n",
+       "      <td>10720.0</td>\n",
+       "      <td>468099974.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>61.482759</td>\n",
+       "      <td>145</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>0</td>\n",
+       "      <td>14</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>84328762.0</td>\n",
+       "      <td>61778.0</td>\n",
+       "      <td>389879083.0</td>\n",
+       "      <td>396521849.0</td>\n",
+       "      <td>492596536.0</td>\n",
+       "      <td>65122.0</td>\n",
+       "      <td>954.3</td>\n",
+       "      <td>403671026.0</td>\n",
+       "      <td>8684.0</td>\n",
+       "      <td>375635231.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>87.315789</td>\n",
+       "      <td>145</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 165 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            SUM(readings.value_S09)  SUM(readings.value_S01)  \\\n",
+       "turbine_id                                                     \n",
+       "T001                    102204875.0                  19558.0   \n",
+       "T001                    102808505.0                  37965.0   \n",
+       "T001                    103701788.0                  73948.0   \n",
+       "T001                    104917985.0                  87206.0   \n",
+       "T001                     84328762.0                  61778.0   \n",
+       "\n",
+       "            SUM(readings.value_S12)  SUM(readings.value_S10)  \\\n",
+       "turbine_id                                                     \n",
+       "T001                    483068250.0              486911931.0   \n",
+       "T001                    483585662.0              487487610.0   \n",
+       "T001                    484538080.0              488531121.0   \n",
+       "T001                    486012792.0              490024295.0   \n",
+       "T001                    389879083.0              396521849.0   \n",
+       "\n",
+       "            SUM(readings.value_S18)  SUM(readings.value_S03)  \\\n",
+       "turbine_id                                                     \n",
+       "T001                    463347422.0                  18602.0   \n",
+       "T001                    467167621.0                  34495.0   \n",
+       "T001                    473938223.0                  77804.0   \n",
+       "T001                    483808936.0                  81629.0   \n",
+       "T001                    492596536.0                  65122.0   \n",
+       "\n",
+       "            SUM(readings.value_S16)  SUM(readings.value_S11)  \\\n",
+       "turbine_id                                                     \n",
+       "T001                          555.2              499808026.0   \n",
+       "T001                          719.2              500401347.0   \n",
+       "T001                          921.1              501472849.0   \n",
+       "T001                          977.2              502994331.0   \n",
+       "T001                          954.3              403671026.0   \n",
+       "\n",
+       "            SUM(readings.value_S21)  SUM(readings.value_S08)  ...  \\\n",
+       "turbine_id                                                    ...   \n",
+       "T001                         3090.0              465058755.0  ...   \n",
+       "T001                         4970.0              465669184.0  ...   \n",
+       "T001                         9902.0              466675578.0  ...   \n",
+       "T001                        10720.0              468099974.0  ...   \n",
+       "T001                         8684.0              375635231.0  ...   \n",
+       "\n",
+       "            MEAN(readings.value_S20)  COUNT(readings)  \\\n",
+       "turbine_id                                              \n",
+       "T001                       22.406897              145   \n",
+       "T001                       35.282759              145   \n",
+       "T001                       53.255172              145   \n",
+       "T001                       61.482759              145   \n",
+       "T001                       87.315789              145   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.WEEKDAY(timestamp))  \\\n",
+       "turbine_id                                            \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
+       "turbine_id                                        \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.YEAR(timestamp))  \\\n",
+       "turbine_id                                         \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.MONTH(timestamp))  \\\n",
+       "turbine_id                                          \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "\n",
+       "            MODE(readings.WEEKDAY(timestamp))  MODE(readings.DAY(timestamp))  \\\n",
+       "turbine_id                                                                     \n",
+       "T001                                        4                             11   \n",
+       "T001                                        5                             12   \n",
+       "T001                                        6                             13   \n",
+       "T001                                        0                             14   \n",
+       "T001                                        1                             15   \n",
+       "\n",
+       "            MODE(readings.YEAR(timestamp))  MODE(readings.MONTH(timestamp))  \n",
+       "turbine_id                                                                   \n",
+       "T001                                  2013                                1  \n",
+       "T001                                  2013                                1  \n",
+       "T001                                  2013                                1  \n",
+       "T001                                  2013                                1  \n",
+       "T001                                  2013                                1  \n",
+       "\n",
+       "[5 rows x 165 columns]"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id           timestamp  value_S01  value_S02  value_S03  value_S04  \\\n",
+       "0       T001 2013-01-10 00:00:00      323.0      320.0      284.0      348.0   \n",
+       "1       T001 2013-01-10 00:10:00      346.0      384.0      367.0      411.0   \n",
+       "2       T001 2013-01-10 00:20:00      407.0      363.0      407.0      393.0   \n",
+       "3       T001 2013-01-10 00:30:00      257.0      307.0      315.0      361.0   \n",
+       "4       T001 2013-01-10 00:40:00      267.0      309.0      314.0      355.0   \n",
+       "\n",
+       "   value_S05  value_S06  value_S07  value_S08  ...  value_S17  value_S18  \\\n",
+       "0      273.0      342.0      280.0  3197842.0  ...       11.7  3131020.0   \n",
+       "1      331.0      360.0      249.0  3197900.0  ...       10.2  3131420.0   \n",
+       "2      275.0      335.0      270.0  3197968.0  ...        9.5  3131822.0   \n",
+       "3      317.0      354.0      271.0  3198011.0  ...       10.5  3132179.0   \n",
+       "4      262.0      246.0      212.0  3198056.0  ...        9.6  3132501.0   \n",
+       "\n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "\n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## xgboost.XGBClassifier\n",
+    "\n",
+    "* Input: X (label encoded and featurized)\n",
+    "* Output: None\n",
+    "* Effect: trained model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 6\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 0360fe2fa8b5d53f28f996e38bec8d4ca66798f8 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 23 Oct 2020 10:47:54 +0200
Subject: [PATCH 118/171] Fix mkdirs

---
 greenguard/benchmark.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index eff50bc..89d1076 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -586,7 +586,8 @@ def run_benchmark(templates, problems, window_size_resample_rule=None,
 
     results = pd.concat(results, ignore_index=True)
     if output_path:
-        os.makedirs(output_path, exist_ok=True)
+        if os.path.dirname(output_path):
+            os.makedirs(os.path.dirname(output_path), exist_ok=True)
         results.to_csv(output_path, index=False)
 
     else:

From 704ad911d15faf18943daf2b1caa7a096ed46e83 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 23 Oct 2020 10:59:48 +0200
Subject: [PATCH 119/171] Add release notes for v0.2.6

---
 HISTORY.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/HISTORY.md b/HISTORY.md
index d0c8364..ef6042e 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,14 @@
 # History
 
+## 0.2.6 - 2020-10-23
+
+* Fix ``mkdir`` when exporting to ``csv`` file the benchmark results.
+* Intermediate steps for the pipelines with demo notebooks for each pipeline.
+
+### Resolved Issues
+
+* Issue #50: Expose partial outputs and executions in the ``GreenGuardPipeline``.
+
 ## 0.2.5 - 2020-10-09
 
 With this release we include:

From 5fb8e05c872b635b585163b9ab5b62320d3b08fc Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 23 Oct 2020 10:59:52 +0200
Subject: [PATCH 120/171] =?UTF-8?q?Bump=20version:=200.2.6.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.2.6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 662545d..4ff2249 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.6.dev0'
+__version__ = '0.2.6'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 919f5d6..4a8c235 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.6.dev0
+current_version = 0.2.6
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 3bce783..a056e0d 100644
--- a/setup.py
+++ b/setup.py
@@ -111,6 +111,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.6.dev0',
+    version='0.2.6',
     zip_safe=False,
 )

From bd035ec11de791205c0f1b2b44d7c0765913191a Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 23 Oct 2020 11:12:05 +0200
Subject: [PATCH 121/171] =?UTF-8?q?Bump=20version:=200.2.6=20=E2=86=92=200?=
 =?UTF-8?q?.2.7.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 4ff2249..63ff9ee 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.6'
+__version__ = '0.2.7.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 4a8c235..86de050 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.6
+current_version = 0.2.7.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index a056e0d..63b5ae7 100644
--- a/setup.py
+++ b/setup.py
@@ -111,6 +111,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.6',
+    version='0.2.7.dev0',
     zip_safe=False,
 )

From 486ca5c6303c97c6a6881a59abec1e31bc3f7547 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev
 <41479552+pvk-developer@users.noreply.github.com>
Date: Tue, 1 Dec 2020 12:13:45 +0100
Subject: [PATCH 122/171] Update dependencies, python and tox (#53)

* Update requirements and test environments.

* Add system dependancie for xgbost.

* Update python version available.
---
 .github/workflows/tests.yml | 83 +++++++++++++++++++++++++++++++++++--
 Makefile                    | 47 +++++++++++++--------
 README.md                   |  2 +-
 setup.py                    | 14 +++----
 tox.ini                     | 40 ++++++++++++------
 5 files changed, 145 insertions(+), 41 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 135d2a5..97dbb0e 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -7,25 +7,100 @@ on:
     branches: [ master ]
 
 jobs:
-  build:
+  lint:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.6, 3.7]
+        python-version: [3.6, 3.7, 3.8]
         os: [ubuntu-latest]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install tox tox-gh-actions
+    - name: Test with tox
+      run: tox -e lint
 
+  readme:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+        os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
       uses: actions/setup-python@v1
       with:
         python-version: ${{ matrix.python-version }}
+    - name: Install libgomp1
+      run: |
+          sudo apt-get install libgomp1
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install tox tox-gh-actions
+    - name: Test with tox
+      run: tox -e readme
 
+  unit:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+        os: [ubuntu-latest, macos-latest]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
-        sudo apt-get install pandoc
         python -m pip install --upgrade pip
         pip install tox tox-gh-actions
+    - name: Test with tox
+      run: tox -e unit
 
+  minimum:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+        os: [ubuntu-latest]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install tox tox-gh-actions
+    - name: Test with tox
+      run: tox -e minimum
+
+  tutorials:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.6, 3.7, 3.8]
+        os: [ubuntu-latest]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install tox tox-gh-actions
     - name: Test with tox
-      run: tox
+      run: tox -e tutorials
diff --git a/Makefile b/Makefile
index ea625f3..2b2d2f8 100644
--- a/Makefile
+++ b/Makefile
@@ -49,9 +49,7 @@ clean-pyc: ## remove Python file artifacts
 
 .PHONY: clean-docs
 clean-docs: ## remove previously built docs
-	rm -f docs/api/*.rst
-	rm -rf docs/tutorials
-	-$(MAKE) -C docs clean 2>/dev/null  # this fails if sphinx is not yet installed
+	rm -rf docs/api/ docs/api_reference/api/ docs/tutorials docs/build docs/_build
 
 .PHONY: clean-coverage
 clean-coverage: ## remove coverage artifacts
@@ -82,21 +80,38 @@ install-test: clean-build clean-pyc ## install the package and test dependencies
 install-develop: clean-build clean-pyc ## install the package in editable mode and dependencies for development
 	pip install -e .[dev]
 
+MINIMUM := $(shell sed -n '/install_requires = \[/,/]/p' setup.py | grep -v -e '[][]' | sed 's/ *\(.*\),$?$$/\1/g' | tr '>' '=')
+
+.PHONY: install-minimum
+install-minimum: ## install the minimum supported versions of the package dependencies
+	echo pip install $(MINIMUM)
+
 
 # LINT TARGETS
 
+.PHONY: lint-greenguard
+lint-btb: ## check style with flake8 and isort
+	flake8 greenguard
+	isort -c --recursive greenguard
+
+.PHONY: lint-tests
+lint-tests: ## check style with flake8 and isort
+	flake8 --ignore=D,SFS2 tests
+	isort -c --recursive tests
+
+.PHONY: check-dependencies
+check-dependencies: ## test if there are any broken dependencies
+	pip check
+
 .PHONY: lint
-lint: ## check style with flake8 and isort
-	flake8 greenguard tests
-	isort -c --recursive greenguard tests
+lint: check-dependencies lint-greenguard lint-tests ## Run all code style and static testing validations
 
 .PHONY: fix-lint
 fix-lint: ## fix lint issues using autoflake, autopep8, and isort
-	find greenguard tests -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables
-	autopep8 --in-place --recursive --aggressive greenguard tests
+	find greenguard -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables
+	autopep8 --in-place --recursive --aggressive greenguard
 	isort --apply --atomic --recursive greenguard tests
 
-
 # TEST TARGETS
 
 .PHONY: test-unit
@@ -111,13 +126,14 @@ test-readme: ## run the readme snippets
 
 .PHONY: test-tutorials
 test-tutorials: ## run the tutorial notebooks
-	jupyter nbconvert --execute --ExecutePreprocessor.timeout=600 tutorials/*.ipynb --stdout > /dev/null
+	find tutorials -path "*/.ipynb_checkpoints" -prune -false -o -name "*.ipynb" -exec \
+		jupyter nbconvert --execute --ExecutePreprocessor.timeout=3600 --to=html --stdout {} > /dev/null \;
 
 .PHONY: test
-test: test-unit test-readme ## test everything that needs test dependencies
+test: test-unit test-readme test-tutorials ## test everything that needs test dependencies
 
-.PHONY: test-devel
-test-devel: lint docs ## test everything that needs development dependencies
+.PHONY: test-minimum
+test-minimum: install-minimum check-dependencies test-unit ## run tests using the minimum supported dependencies
 
 .PHONY: test-all
 test-all: ## run tests on every Python version with tox
@@ -130,17 +146,14 @@ coverage: ## check code coverage quickly with the default Python
 	coverage html
 	$(BROWSER) htmlcov/index.html
 
-
 # DOCS TARGETS
 
 .PHONY: docs
 docs: clean-docs ## generate Sphinx HTML documentation, including API docs
-	cp -r tutorials docs/tutorials
-	sphinx-apidoc --separate --no-toc -o docs/api/ greenguard
 	$(MAKE) -C docs html
 
 .PHONY: view-docs
-view-docs: docs ## view docs in browser
+view-docs: ## view the docs in a browser
 	$(BROWSER) docs/_build/html/index.html
 
 .PHONY: serve-docs
diff --git a/README.md b/README.md
index adbc11e..1f7551c 100644
--- a/README.md
+++ b/README.md
@@ -53,7 +53,7 @@ The salient aspects of this customized project are:
 
 ## Requirements
 
-**GreenGuard** has been developed and runs on Python 3.6 and 3.7.
+**GreenGuard** has been developed and runs on Python 3.6, 3.7 and 3.8.
 
 Also, although it is not strictly required, the usage of a [virtualenv](
 https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid interfering
diff --git a/setup.py b/setup.py
index 63b5ae7..8837963 100644
--- a/setup.py
+++ b/setup.py
@@ -16,18 +16,18 @@
     history = ''
 
 install_requires = [
-    'baytune>=0.3.9,<0.4',
+    'baytune>=0.3.13.dev0,<0.4',
     'mlblocks>=0.3.4,<0.4',
-    'mlprimitives>=0.2.5,<0.3',
+    'mlprimitives>=0.2.6.dev0,<0.3',
     'pymongo>=3.7.2,<4',
-    'scikit-learn>=0.20.0,<0.21',
+    'scikit-learn>=0.21',
     'tqdm<4.50.0,>=4.36.1',
     'cloudpickle>=1.6,<2',
     'scipy>=1.0.1,<1.4.0',
-    'numpy>=1.15.4,<1.17',
-    'pandas>=0.23.4,<0.25',
+    'numpy<1.19.0,>=1.16.0',
+    'pandas>=1,<2',
     'dask>=2.6.0,<3',
-    'Keras>=2.1.6,<2.4',
+    'Keras>=2.4',
     'tabulate>=0.8.3,<0.9',
     'xlsxwriter>=1.3.6<1.4',
     'boto3==1.14.44',
@@ -106,7 +106,7 @@
     long_description_content_type='text/markdown',
     name='greenguard',
     packages=find_packages(include=['greenguard', 'greenguard.*']),
-    python_requires='>=3.6,<3.8',
+    python_requires='>=3.6,<3.9',
     setup_requires=setup_requires,
     test_suite='tests',
     tests_require=tests_require,
diff --git a/tox.ini b/tox.ini
index 91af938..0068931 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,25 +1,41 @@
+[testenv:docs]
+skipsdist = true
+extras = dev
+commands =
+    /usr/bin/env make docs
+
 [tox]
-envlist = py{36,37}, test-devel
+envlist = py3{6,7,8}-{lint,readme,unit,minimum}
 
 [travis]
 python =
-    3.7: py37, test-devel
-    3.6: py36
+    3.8: py38-lint, py38-readme, py38-unit, py38-minimum, py38-tutorials
+    3.7: py37-lint, py37-readme, py37-unit, py37-minimum, py37-tutorials
+    3.6: py36-lint, py36-readme, py36-unit, py36-minimum, py36-tutorials
 
 [gh-actions]
 python =
-    3.7: py37, test-devel
-    3.6: py36
+    3.8: py38-lint, py38-readme, py38-unit, py38-minimum, py38-tutorials
+    3.7: py37-lint, py37-readme, py37-unit, py37-minimum, py37-tutorials
+    3.6: py36-lint, py36-readme, py36-unit, py36-minimum, py36-tutorials
 
 [testenv]
 passenv = CI TRAVIS TRAVIS_*
 skipsdist = false
 skip_install = false
-extras = test
-commands =
-    /usr/bin/env make test
-
-[testenv:test-devel]
-extras = dev
+deps =
+    readme: rundoc
+    tutorials: jupyter
+extras =
+    lint: dev
+    unit: test
+    minimum: test
 commands =
-    /usr/bin/env make test-devel
+    lint: /usr/bin/env make lint
+    readme: /usr/bin/env make test-readme
+    unit: /usr/bin/env make test-unit
+    minimum: /usr/bin/env make test-minimum
+    tutorials: /usr/bin/env make test-tutorials
+    rm -r {envdir}
+whitelist_externals =
+    rm

From b4b4cd98639185ace7e984190e081341bf6019cf Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 1 Dec 2020 12:17:37 +0100
Subject: [PATCH 123/171] =?UTF-8?q?Bump=20version:=200.2.7.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.2.7.dev1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 63ff9ee..a027a27 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.7.dev0'
+__version__ = '0.2.7.dev1'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 86de050..84c637d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.7.dev0
+current_version = 0.2.7.dev1
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 8837963..b50e4a5 100644
--- a/setup.py
+++ b/setup.py
@@ -111,6 +111,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.7.dev0',
+    version='0.2.7.dev1',
     zip_safe=False,
 )

From 27d362a7e9f1f18635972e9c2aec93a26cd42b5a Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 1 Dec 2020 15:06:51 +0100
Subject: [PATCH 124/171] Update scipy version

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index b50e4a5..6746be9 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@
     'scikit-learn>=0.21',
     'tqdm<4.50.0,>=4.36.1',
     'cloudpickle>=1.6,<2',
-    'scipy>=1.0.1,<1.4.0',
+    'scipy>=1.0.1,<2',
     'numpy<1.19.0,>=1.16.0',
     'pandas>=1,<2',
     'dask>=2.6.0,<3',

From bf9e31ec7f9698e9c976b13dcfcfa13f0fb59dc3 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Tue, 1 Dec 2020 15:07:43 +0100
Subject: [PATCH 125/171] =?UTF-8?q?Bump=20version:=200.2.7.dev1=20?=
 =?UTF-8?q?=E2=86=92=200.2.7.dev2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index a027a27..abea0c2 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.7.dev1'
+__version__ = '0.2.7.dev2'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 84c637d..724b591 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.7.dev1
+current_version = 0.2.7.dev2
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 6746be9..d3fcf1a 100644
--- a/setup.py
+++ b/setup.py
@@ -111,6 +111,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.7.dev1',
+    version='0.2.7.dev2',
     zip_safe=False,
 )

From 007bd5e60d415b533da1c96dfa9ca6c8a44cdc17 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev
 <41479552+pvk-developer@users.noreply.github.com>
Date: Fri, 4 Dec 2020 12:53:05 +0100
Subject: [PATCH 126/171] Add threshold (#55)

* Add threshold to the GreenGuardPipeline

* Update description.

* Curate docstrings
---
 greenguard/pipeline.py | 34 +++++++++++++++++++++++++++++-----
 1 file changed, 29 insertions(+), 5 deletions(-)

diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 34504e3..ba68278 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -150,6 +150,9 @@ def generate_preprocessing(templates_names, preprocessing):
     return preprocessing
 
 
+SELF_THRESHOLD = object()
+
+
 class GreenGuardPipeline(object):
     """Main Machine Learning component in the GreenGuard project.
 
@@ -228,6 +231,11 @@ class GreenGuardPipeline(object):
         cache_path (str):
             If given, cache the generated cross validation splits in this folder.
             Defatuls to ``None``.
+        threshold (float):
+            If ``None``, return the raw predictions as given by the pipeline. If not ``None``,
+            use the given value as a threshold to convert the predicted probabilities into
+            a binary output that indicates whether the probability is above the threshold (not
+            strict) or below the threshold (strict). Defaults to ``None``.
     """
 
     template = None
@@ -304,8 +312,9 @@ def _build_pipeline(self):
 
         self.fitted = False
 
-    def __init__(self, templates, metric='accuracy', cost=False, init_params=None, stratify=True,
-                 cv_splits=5, shuffle=True, random_state=0, preprocessing=0, cache_path=None):
+    def __init__(self, templates, metric='accuracy', cost=False, init_params=None,
+                 stratify=True, cv_splits=5, shuffle=True, random_state=0, preprocessing=0,
+                 cache_path=None, threshold=None):
 
         if isinstance(metric, str):
             metric, cost = METRICS[metric]
@@ -314,6 +323,7 @@ def __init__(self, templates, metric='accuracy', cost=False, init_params=None, s
         self._cost = cost
         self._cv = self._get_cv(stratify, cv_splits, shuffle, random_state)
         self.cv_score = np.inf if cost else -np.inf
+        self.threshold = threshold
 
         if not isinstance(templates, list):
             templates = [templates]
@@ -556,7 +566,7 @@ def fit(self, target_times=None, readings=None, turbines=None,
         return out
 
     def predict(self, target_times=None, readings=None, turbines=None,
-                start_=None, output_='default', **kwargs):
+                start_=None, output_='default', threshold=SELF_THRESHOLD, **kwargs):
         """Make predictions using this pipeline.
 
         Args:
@@ -567,6 +577,13 @@ def predict(self, target_times=None, readings=None, turbines=None,
                 ``readings`` table.
             turbines (pandas.DataFrame):
                 ``turbines`` table.
+            threshold (float):
+                If not given, use the threshold specified upon instance creation in the
+                ``__init__``. If ``None``, return the raw predictions as given by the pipeline.
+                If not ``None``, use the given value as a threshold to convert the predicted
+                probabilities into a binary output that indicates whether the probability is above
+                the threshold (not strict) or below the threshold (strict).
+                Defaults to ``self.threshold``.
 
         Returns:
             numpy.ndarray:
@@ -576,8 +593,15 @@ def predict(self, target_times=None, readings=None, turbines=None,
             raise NotFittedError()
 
         X = target_times[['turbine_id', 'cutoff_time']]
-        return self._pipeline.predict(X, readings=readings, turbines=turbines,
-                                      start_=start_, output_=output_, **kwargs)
+        predictions = self._pipeline.predict(X, readings=readings, turbines=turbines,
+                                             start_=start_, output_=output_, **kwargs)
+        if threshold is SELF_THRESHOLD:
+            threshold = self.threshold
+
+        if threshold is not None:
+            predictions = predictions >= threshold
+
+        return predictions
 
     def save(self, path):
         """Serialize and save this pipeline using cloudpickle.

From e360694afa0a4e7f808b176a362c11f64f7e7d7b Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev
 <41479552+pvk-developer@users.noreply.github.com>
Date: Mon, 7 Dec 2020 17:56:43 +0100
Subject: [PATCH 127/171] Add false positive rate metric (#56)

* Add integration tests

* Add FPR metric

* Fix tests

* Fix lint py37

* Add dosctring
---
 greenguard/metrics.py | 41 +++++++++++++++++++++++++++++++++++++++--
 tests/test_metrics.py | 39 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_metrics.py

diff --git a/greenguard/metrics.py b/greenguard/metrics.py
index 54a151e..ef1c249 100644
--- a/greenguard/metrics.py
+++ b/greenguard/metrics.py
@@ -1,18 +1,55 @@
 # -*- coding: utf-8 -*-
+import logging
 
+import numpy as np
 from sklearn.metrics import (
-    accuracy_score, f1_score, mean_absolute_error, mean_squared_error, r2_score)
+    accuracy_score, f1_score, mean_absolute_error, mean_squared_error, roc_curve, r2_score)
+
+LOGGER = logging.getLogger(__name__)
 
 
 def f1_macro(exp, obs):
     return f1_score(exp, obs, average='macro')
 
 
+def fpr_score(ground_truth, probabilities, tpr=1):
+    """Compute the False Positive Rate associated with the given True Positive Rate.
+
+    This metric computes the False Positive Rate that needs to be assumed in order
+    to achieve the desired True Positive Rate.
+    The metric is computed by finding the minimum necessary threshold to ensure
+    that the TPR is satisfied and then computing the associated FPR. The final output
+    is 1 minus the found FPR to produce a maximization score between 0 and 1.
+
+    Args:
+        ground_truth (numpy.ndarray):
+            ``numpy.ndarray`` of the known values for the given predictions.
+        probabilities (numpy.ndarray):
+            ``numpy.ndarray`` with the generated predictions in probability.
+        tpr (float):
+            ``float`` value representing the percentage of True Positive Rate
+            to be satisfied.
+
+    Returns:
+        float:
+            Value between 0 and 1, where bigger is better.
+    """
+    roc_fpr, roc_tpr, roc_threshold = roc_curve(ground_truth, probabilities, pos_label=1)
+    try:
+        index = np.where(roc_tpr >= tpr)[0][0]
+    except:
+        LOGGER.warn('Could not find a threshold that satisfies the requested True Positive Rate')
+        index = -1
+
+    return 1 - roc_fpr[index]
+
+
 METRICS = {
     'accuracy': (accuracy_score, False),
     'f1': (f1_score, False),
     'f1_macro': (f1_macro, False),
     'r2': (r2_score, False),
     'mse': (mean_squared_error, True),
-    'mae': (mean_absolute_error, True)
+    'mae': (mean_absolute_error, True),
+    'fpr_score': (fpr_score, False)
 }
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
new file mode 100644
index 0000000..ce14132
--- /dev/null
+++ b/tests/test_metrics.py
@@ -0,0 +1,39 @@
+import numpy as np
+
+from greenguard.metrics import fpr_score
+
+
+def test_fpr_score_perfect_scenario():
+    truth = [0, 0, 0, 1, 1, 1]
+    false_probs = [0.2, 0.4, 0.6]
+    true_probs = [0.8, 0.7, 0.9]
+    probs = np.concatenate([false_probs, true_probs])
+    score = fpr_score(truth, probs, tpr=1)
+    assert score == 1
+
+
+def test_fpr_score_predict_over_half():
+    truth = [0, 0, 0, 0, 1, 1, 1, 1]
+    false_probs = [0.1, 0.2, 0.4, 0.6]
+    true_probs = [0.5, 0.7, 0.8, 0.9]
+    probs = np.concatenate([false_probs, true_probs])
+    score = fpr_score(truth, probs, tpr=1)
+    assert score == 0.75
+
+
+def test_fpr_score_predict_half():
+    truth = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
+    false_probs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
+    true_probs = [0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
+    probs = np.concatenate([false_probs, true_probs])
+    score = fpr_score(truth, probs, tpr=1)
+    assert score == 0.5
+
+
+def test_fpr_score_predict_one_third():
+    truth = [0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1]
+    false_probs = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]
+    true_probs = [0.3, 0.4, 0.5, 0.7, 0.8, 0.9]
+    probs = np.concatenate([false_probs, true_probs])
+    score = fpr_score(truth, probs, tpr=1)
+    assert round(score, 4) == 0.3333

From 03c9838abb494e4c8f60f3710b3f14be2eb7cc35 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev
 <41479552+pvk-developer@users.noreply.github.com>
Date: Mon, 7 Dec 2020 18:02:23 +0100
Subject: [PATCH 128/171] Add primitives and pipelines with proba (#54)

* Add primitives module for GreenGuard and required primitives for the proba pipelines.

* Add pipelines using predict_proba instead of predict.

* Use numpy.take

* Update primitives structure, readme and get primitives.

* Fix get_pipelines.

* Add contributor
---
 README.md                                     |  14 +-
 greenguard/__init__.py                        |   1 +
 greenguard/pipeline.py                        |  15 +-
 .../normalize_dfs_xgb_classifier.json         |   0
 .../unstack_dfs_xgb_classifier.json           |   0
 ...ack_double_lstm_timeseries_classifier.json |   0
 .../unstack_lstm_timeseries_classifier.json   |   0
 .../unstack_normalize_dfs_xgb_classifier.json |   0
 .../normalize_dfs_xgb_classifier.json         |  70 ++++
 .../unstack_dfs_xgb_classifier.json           |  83 ++++
 ...ack_double_lstm_timeseries_classifier.json | 125 ++++++
 .../unstack_lstm_timeseries_classifier.json   | 125 ++++++
 .../unstack_normalize_dfs_xgb_classifier.json |  74 ++++
 greenguard/primitives/numpy.take.json         |  40 ++
 .../xgboost.XGBClassifier:probabilities.json  |  93 +++++
 setup.py                                      |   3 +-
 .../01_GreenGuard_Machine_Learning.ipynb      | 377 ++++++++----------
 17 files changed, 792 insertions(+), 228 deletions(-)
 rename greenguard/pipelines/{ => classes}/normalize_dfs_xgb_classifier.json (100%)
 rename greenguard/pipelines/{ => classes}/unstack_dfs_xgb_classifier.json (100%)
 rename greenguard/pipelines/{ => classes}/unstack_double_lstm_timeseries_classifier.json (100%)
 rename greenguard/pipelines/{ => classes}/unstack_lstm_timeseries_classifier.json (100%)
 rename greenguard/pipelines/{ => classes}/unstack_normalize_dfs_xgb_classifier.json (100%)
 create mode 100644 greenguard/pipelines/probability/normalize_dfs_xgb_classifier.json
 create mode 100644 greenguard/pipelines/probability/unstack_dfs_xgb_classifier.json
 create mode 100644 greenguard/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
 create mode 100644 greenguard/pipelines/probability/unstack_lstm_timeseries_classifier.json
 create mode 100644 greenguard/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json
 create mode 100644 greenguard/primitives/numpy.take.json
 create mode 100644 greenguard/primitives/xgboost.XGBClassifier:probabilities.json

diff --git a/README.md b/README.md
index 1f7551c..0472817 100644
--- a/README.md
+++ b/README.md
@@ -225,18 +225,18 @@ The returned `pipeline` variable will be `list` containing the names of all the
 available in the GreenGuard system:
 
 ```
-['unstack_double_lstm_timeseries_classifier',
- 'unstack_lstm_timeseries_classifier',
- 'unstack_normalize_dfs_xgb_classifier',
- 'unstack_dfs_xgb_classifier',
- 'normalize_dfs_xgb_classifier']
+['classes.unstack_double_lstm_timeseries_classifier',
+ 'classes.unstack_lstm_timeseries_classifier',
+ 'classes.unstack_normalize_dfs_xgb_classifier',
+ 'classes.unstack_dfs_xgb_classifier',
+ 'classes.normalize_dfs_xgb_classifier']
 ```
 
 For the rest of this tutorial, we will select and use the pipeline
-`normalize_dfs_xgb_classifier` as our template.
+`classes.normalize_dfs_xgb_classifier` as our template.
 
 ```python3
-pipeline_name = 'normalize_dfs_xgb_classifier'
+pipeline_name = 'classes.normalize_dfs_xgb_classifier'
 ```
 
 ## 3. Fitting the Pipeline
diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index abea0c2..4374a6d 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -12,6 +12,7 @@
 
 _BASE_PATH = os.path.abspath(os.path.dirname(__file__))
 MLBLOCKS_PIPELINES = os.path.join(_BASE_PATH, 'pipelines')
+MLBLOCKS_PRIMITIVES = os.path.join(_BASE_PATH, 'primitives')
 
 
 __all__ = (
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index ba68278..156c046 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -54,7 +54,7 @@ def __setstate__(self, state):
 Sequential.__setstate__ = __setstate__
 
 
-def get_pipelines(pattern='', path=False, unstacked=False):
+def get_pipelines(pattern='', path=False, pipeline_type='classes'):
     """Get the list of available pipelines.
 
     Optionally filter the names using a patter or obtain
@@ -66,9 +66,9 @@ def get_pipelines(pattern='', path=False, unstacked=False):
         path (bool):
             Whether to return a dictionary containing the pipeline
             paths instead of only a list with the names.
-        unstacked (bool):
-            Whether to load the pipelines that expect the readings
-            to be already unstacked by signal_id. Defaults to ``False``.
+        pipeline_type (str):
+            The pipeline category to filter by (`classes`, `probability` and `unstacked`).
+            Defaults to `classes`.
 
     Return:
         list or dict:
@@ -77,14 +77,13 @@ def get_pipelines(pattern='', path=False, unstacked=False):
             names as keys and their absolute paths as values.
     """
     pipelines = dict()
-    pipelines_dir = PIPELINES_DIR
-    if unstacked:
-        pipelines_dir = os.path.join(pipelines_dir, 'unstacked')
+    pipelines_dir = os.path.join(PIPELINES_DIR, pipeline_type)
 
     for filename in os.listdir(pipelines_dir):
         if filename.endswith('.json') and pattern in filename:
             name = os.path.basename(filename)[:-len('.json')]
-            pipeline_path = os.path.join(PIPELINES_DIR, filename)
+            name = f'{pipeline_type}.{name}'
+            pipeline_path = os.path.join(pipelines_dir, filename)
             pipelines[name] = pipeline_path
 
     if not path:
diff --git a/greenguard/pipelines/normalize_dfs_xgb_classifier.json b/greenguard/pipelines/classes/normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/normalize_dfs_xgb_classifier.json
rename to greenguard/pipelines/classes/normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/unstack_dfs_xgb_classifier.json b/greenguard/pipelines/classes/unstack_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstack_dfs_xgb_classifier.json
rename to greenguard/pipelines/classes/unstack_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/unstack_double_lstm_timeseries_classifier.json b/greenguard/pipelines/classes/unstack_double_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstack_double_lstm_timeseries_classifier.json
rename to greenguard/pipelines/classes/unstack_double_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/unstack_lstm_timeseries_classifier.json b/greenguard/pipelines/classes/unstack_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstack_lstm_timeseries_classifier.json
rename to greenguard/pipelines/classes/unstack_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json b/greenguard/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json
rename to greenguard/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/probability/normalize_dfs_xgb_classifier.json b/greenguard/pipelines/probability/normalize_dfs_xgb_classifier.json
new file mode 100644
index 0000000..495a5d9
--- /dev/null
+++ b/greenguard/pipelines/probability/normalize_dfs_xgb_classifier.json
@@ -0,0 +1,70 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "featuretools.EntitySet.entity_from_dataframe",
+        "featuretools.EntitySet.normalize_entity",
+        "featuretools.EntitySet.normalize_entity",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier:probabilities",
+        "numpy.take"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": true
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "entity_id": "readings",
+            "index": "reading_id",
+            "make_index": true,
+            "time_index": "timestamp"
+        },
+        "featuretools.EntitySet.normalize_entity#1": {
+            "base_entity_id": "readings",
+            "new_entity_id": "turbines",
+            "index": "turbine_id",
+            "make_time_index": false
+        },
+        "featuretools.EntitySet.normalize_entity#2": {
+            "base_entity_id": "readings",
+            "new_entity_id": "signals",
+            "index": "signal_id",
+            "make_time_index": false
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": false,
+            "n_jobs": 1,
+            "training_window": "1d"
+        },
+        "numpy.take#1": {
+            "indices": 1,
+            "axis": 1
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "dataframe": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/probability/unstack_dfs_xgb_classifier.json b/greenguard/pipelines/probability/unstack_dfs_xgb_classifier.json
new file mode 100644
index 0000000..aedbada
--- /dev/null
+++ b/greenguard/pipelines/probability/unstack_dfs_xgb_classifier.json
@@ -0,0 +1,83 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
+        "featuretools.EntitySet.entity_from_dataframe",
+        "featuretools.EntitySet.entity_from_dataframe",
+        "featuretools.EntitySet.add_relationship",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier:probabilities",
+        "numpy.take"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "entity_id": "readings",
+            "index": "reading_id",
+            "make_index": true,
+            "time_index": "timestamp"
+        },
+        "featuretools.EntitySet.entity_from_dataframe#2": {
+            "entity_id": "turbines",
+            "index": "turbine_id",
+            "make_index": false
+        },
+        "featuretools.EntitySet.add_relationship#1": {
+            "parent": "turbines",
+            "parent_column": "turbine_id",
+            "child": "readings",
+            "child_column": "turbine_id"
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": true,
+            "n_jobs": 1,
+            "training_window": "1d"
+        },
+        "numpy.take#1": {
+            "indices": 1,
+            "axis": 1
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "dataframe": "readings"
+        },
+        "featuretools.EntitySet.entity_from_dataframe#2": {
+            "dataframe": "turbines"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/probability/unstack_double_lstm_timeseries_classifier.json b/greenguard/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
new file mode 100644
index 0000000..46f05e1
--- /dev/null
+++ b/greenguard/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
@@ -0,0 +1,125 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
+        "pandas.DataFrame.pop",
+        "pandas.DataFrame.pop",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.preprocessing.MinMaxScaler",
+        "pandas.DataFrame",
+        "pandas.DataFrame.set",
+        "pandas.DataFrame.set",
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier",
+        "numpy.take"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "3600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "feature_range": [
+                -1,
+                1
+            ]
+        },
+        "pandas.DataFrame#1": {
+            "index": null,
+            "columns": null
+        },
+        "pandas.DataFrame.set#1": {
+            "key": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "key": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "window_size": 24,
+            "cutoff_time": "cutoff_time",
+            "time_index": "timestamp"
+        },
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier#1": {
+            "epochs": 35,
+            "verbose": false,
+            "classification": false
+        },
+        "numpy.take#1": {
+            "indices": 1,
+            "axis": 1
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#2": {
+            "X": "readings"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.set#1": {
+            "X": "readings",
+            "value": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "X": "readings",
+            "value": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "timeseries": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/probability/unstack_lstm_timeseries_classifier.json b/greenguard/pipelines/probability/unstack_lstm_timeseries_classifier.json
new file mode 100644
index 0000000..34760d5
--- /dev/null
+++ b/greenguard/pipelines/probability/unstack_lstm_timeseries_classifier.json
@@ -0,0 +1,125 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
+        "pandas.DataFrame.pop",
+        "pandas.DataFrame.pop",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.preprocessing.MinMaxScaler",
+        "pandas.DataFrame",
+        "pandas.DataFrame.set",
+        "pandas.DataFrame.set",
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "keras.Sequential.LSTMTimeSeriesClassifier",
+        "numpy.take"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "3600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "feature_range": [
+                -1,
+                1
+            ]
+        },
+        "pandas.DataFrame#1": {
+            "index": null,
+            "columns": null
+        },
+        "pandas.DataFrame.set#1": {
+            "key": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "key": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "window_size": 24,
+            "cutoff_time": "cutoff_time",
+            "time_index": "timestamp"
+        },
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier#1": {
+            "epochs": 35,
+            "verbose": false,
+            "classification": false
+        },
+        "numpy.take#1": {
+            "indices": 1,
+            "axis": 1
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#2": {
+            "X": "readings"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.set#1": {
+            "X": "readings",
+            "value": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "X": "readings",
+            "value": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "timeseries": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json b/greenguard/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json
new file mode 100644
index 0000000..eddddd8
--- /dev/null
+++ b/greenguard/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json
@@ -0,0 +1,74 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
+        "featuretools.EntitySet.entity_from_dataframe",
+        "featuretools.EntitySet.normalize_entity",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier:probabilities",
+        "numpy.take"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "entity_id": "readings",
+            "index": "reading_id",
+            "make_index": true,
+            "time_index": "timestamp"
+        },
+        "featuretools.EntitySet.normalize_entity#1": {
+            "base_entity_id": "readings",
+            "new_entity_id": "turbines",
+            "index": "turbine_id",
+            "make_time_index": false
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": false,
+            "n_jobs": 1,
+            "training_window": "1d"
+        },
+        "numpy.take#1": {
+            "indices": 1,
+            "axis": 1
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "dataframe": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/greenguard/primitives/numpy.take.json b/greenguard/primitives/numpy.take.json
new file mode 100644
index 0000000..ad5e7a3
--- /dev/null
+++ b/greenguard/primitives/numpy.take.json
@@ -0,0 +1,40 @@
+{
+    "name": "numpy.take",
+    "contributors": [
+        "Plamen Valentinov Kolev <plamen@csail.mit.edu>"
+    ],
+    "documentation": "https://docs.scipy.org/doc/numpy/reference/",
+    "description": "Take elements from an array along an axis.",
+    "classifiers": {
+        "type": "postprocessor"
+    },
+    "modalities": [],
+    "primitive": "numpy.take",
+    "produce": {
+        "args": [
+            {
+                "name": "y",
+                "keyword": "a",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "ndarray"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "indices": {
+                "type": "int",
+                "default": 0
+            },
+            "axis": {
+                "type": "int",
+                "default": null
+            }
+        }
+    }
+}
diff --git a/greenguard/primitives/xgboost.XGBClassifier:probabilities.json b/greenguard/primitives/xgboost.XGBClassifier:probabilities.json
new file mode 100644
index 0000000..8837381
--- /dev/null
+++ b/greenguard/primitives/xgboost.XGBClassifier:probabilities.json
@@ -0,0 +1,93 @@
+{
+    "name": "xgboost.XGBClassifier",
+    "contributors": [
+        "Carles Sala <csala@csail.mit.edu>"
+    ],
+    "documentation": "https://xgboost.readthedocs.io/en/latest/python/python_api.html#xgboost.XGBClassifier",
+    "description": "Implementation of the scikit-learn API for XGBoost classification.",
+    "classifiers": {
+        "type": "estimator",
+        "subtype": "classifier"
+    },
+    "modalities": [],
+    "primitive": "xgboost.XGBClassifier",
+    "fit": {
+        "method": "fit",
+        "args": [
+            {
+                "name": "X",
+                "type": "ndarray"
+            },
+            {
+                "name": "y",
+                "type": "array"
+            }
+        ]
+    },
+    "produce": {
+        "method": "predict_proba",
+        "args": [
+            {
+                "name": "X",
+                "keyword": "data",
+                "type": "ndarray"
+            }
+        ],
+        "output": [
+            {
+                "name": "y",
+                "type": "array"
+            }
+        ]
+    },
+    "hyperparameters": {
+        "fixed": {
+            "n_jobs": {
+                "type": "int",
+                "default": -1
+            }
+        },
+        "tunable": {
+            "n_estimators": {
+                "type": "int",
+                "default": 100,
+                "range": [
+                    10,
+                    1000
+                ]
+            },
+            "max_depth": {
+                "type": "int",
+                "default": 3,
+                "range": [
+                    3,
+                    10
+                ]
+            },
+            "learning_rate": {
+                "type": "float",
+                "default": 0.1,
+                "range": [
+                    0,
+                    1
+                ]
+            },
+            "gamma": {
+                "type": "float",
+                "default": 0,
+                "range": [
+                    0,
+                    1
+                ]
+            },
+            "min_child_weight": {
+                "type": "int",
+                "default": 1,
+                "range": [
+                    1,
+                    10
+                ]
+            }
+        }
+    }
+}
diff --git a/setup.py b/setup.py
index d3fcf1a..ebbde1c 100644
--- a/setup.py
+++ b/setup.py
@@ -91,7 +91,8 @@
     description='AutoML for Renewable Energy Industries.',
     entry_points={
         'mlblocks': [
-            'pipelines=greenguard:MLBLOCKS_PIPELINES'
+            'pipelines=greenguard:MLBLOCKS_PIPELINES',
+            'primitives=greenguard:MLBLOCKS_PRIMITIVES'
         ],
     },
     extras_require={
diff --git a/tutorials/01_GreenGuard_Machine_Learning.ipynb b/tutorials/01_GreenGuard_Machine_Learning.ipynb
index 7fab764..03a2aa0 100644
--- a/tutorials/01_GreenGuard_Machine_Learning.ipynb
+++ b/tutorials/01_GreenGuard_Machine_Learning.ipynb
@@ -36,7 +36,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -62,7 +62,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -84,7 +84,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [
     {
@@ -157,7 +157,7 @@
        "4       T001  2013-01-16       0"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 3,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -168,7 +168,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -177,7 +177,7 @@
        "(353, 3)"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 4,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -188,7 +188,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -200,7 +200,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -211,7 +211,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -290,7 +290,7 @@
        "4       T001       S05 2013-01-10  273.0"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -301,7 +301,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [
     {
@@ -310,7 +310,7 @@
        "(1313540, 4)"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -321,7 +321,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
@@ -334,7 +334,7 @@
        "dtype: object"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -383,7 +383,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -408,20 +408,37 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "['normalize_dfs_xgb_classifier',\n",
-       " 'unstack_normalize_dfs_xgb_classifier',\n",
-       " 'unstack_dfs_xgb_classifier',\n",
-       " 'unstack_lstm_timeseries_classifier',\n",
-       " 'unstack_double_lstm_timeseries_classifier']"
+       "['unstacked.unstacked_normalize_dfs_xgb_classifier',\n",
+       " 'unstacked.unstacked_double_lstm_timeseries_classifier',\n",
+       " 'unstacked.unstacked_lstm_timeseries_classifier',\n",
+       " 'unstacked.unstacked_dfs_xgb_classifier',\n",
+       " 'classes.unstack_dfs_xgb_classifier',\n",
+       " 'classes.unstack_double_lstm_timeseries_classifier',\n",
+       " 'classes.normalize_dfs_xgb_classifier',\n",
+       " 'classes.unstack_lstm_timeseries_classifier',\n",
+       " 'classes.unstack_normalize_dfs_xgb_classifier',\n",
+       " 'disabled.resample_normalize_dfs_xgb_classifier',\n",
+       " 'disabled.resample_unstack_lstm_timeseries_classifier',\n",
+       " 'disabled.resample_unstack_normalize_dfs_xgb_classifier',\n",
+       " 'disabled.normalize_dfs_xgb_classifier',\n",
+       " 'disabled.resample_unstack_double_lstm_timeseries_classifier',\n",
+       " 'disabled.resample_dfs_xgb_classifier',\n",
+       " 'disabled.resample_unstack_dfs_xgb_classifier',\n",
+       " 'disabled.dfs_xgb_classifier',\n",
+       " 'probability.unstack_dfs_xgb_classifier',\n",
+       " 'probability.unstack_double_lstm_timeseries_classifier',\n",
+       " 'probability.normalize_dfs_xgb_classifier',\n",
+       " 'probability.unstack_lstm_timeseries_classifier',\n",
+       " 'probability.unstack_normalize_dfs_xgb_classifier']"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 10,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -441,18 +458,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "['normalize_dfs_xgb_classifier',\n",
-       " 'unstack_normalize_dfs_xgb_classifier',\n",
-       " 'unstack_dfs_xgb_classifier']"
+       "['unstacked.unstacked_normalize_dfs_xgb_classifier',\n",
+       " 'unstacked.unstacked_dfs_xgb_classifier',\n",
+       " 'classes.unstack_dfs_xgb_classifier',\n",
+       " 'classes.normalize_dfs_xgb_classifier',\n",
+       " 'classes.unstack_normalize_dfs_xgb_classifier',\n",
+       " 'disabled.resample_normalize_dfs_xgb_classifier',\n",
+       " 'disabled.resample_unstack_normalize_dfs_xgb_classifier',\n",
+       " 'disabled.normalize_dfs_xgb_classifier',\n",
+       " 'disabled.resample_dfs_xgb_classifier',\n",
+       " 'disabled.resample_unstack_dfs_xgb_classifier',\n",
+       " 'disabled.dfs_xgb_classifier',\n",
+       " 'probability.unstack_dfs_xgb_classifier',\n",
+       " 'probability.normalize_dfs_xgb_classifier',\n",
+       " 'probability.unstack_normalize_dfs_xgb_classifier']"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -471,18 +499,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'normalize_dfs_xgb_classifier': '/home/usuario/Projects/GreenGuard/greenguard/pipelines/normalize_dfs_xgb_classifier.json',\n",
-       " 'unstack_normalize_dfs_xgb_classifier': '/home/usuario/Projects/GreenGuard/greenguard/pipelines/unstack_normalize_dfs_xgb_classifier.json',\n",
-       " 'unstack_dfs_xgb_classifier': '/home/usuario/Projects/GreenGuard/greenguard/pipelines/unstack_dfs_xgb_classifier.json'}"
+       "{'unstacked.unstacked_normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json',\n",
+       " 'unstacked.unstacked_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/unstacked/unstacked_dfs_xgb_classifier.json',\n",
+       " 'classes.unstack_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/classes/unstack_dfs_xgb_classifier.json',\n",
+       " 'classes.normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/classes/normalize_dfs_xgb_classifier.json',\n",
+       " 'classes.unstack_normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json',\n",
+       " 'disabled.resample_normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json',\n",
+       " 'disabled.resample_unstack_normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json',\n",
+       " 'disabled.normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/normalize_dfs_xgb_classifier.json',\n",
+       " 'disabled.resample_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/resample_dfs_xgb_classifier.json',\n",
+       " 'disabled.resample_unstack_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json',\n",
+       " 'disabled.dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/dfs_xgb_classifier.json',\n",
+       " 'probability.unstack_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/probability/unstack_dfs_xgb_classifier.json',\n",
+       " 'probability.normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/probability/normalize_dfs_xgb_classifier.json',\n",
+       " 'probability.unstack_normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json'}"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -513,13 +552,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
     "templates = [\n",
-    "    'unstack_normalize_dfs_xgb_classifier', \n",
-    "    'normalize_dfs_xgb_classifier'\n",
+    "    'classes.unstack_normalize_dfs_xgb_classifier', \n",
+    "    'classes.normalize_dfs_xgb_classifier'\n",
     "]"
    ]
   },
@@ -546,7 +585,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -568,7 +607,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -585,37 +624,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:btb.session:Obtaining default configuration for unstack_normalize_dfs_xgb_classifier\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Built 165 features\n",
-      "Elapsed: 00:34 | Progress: 100%|██████████\n",
-      "Elapsed: 00:18 | Progress: 100%|██████████\n",
-      "Built 165 features\n",
-      "Elapsed: 00:36 | Progress: 100%|██████████\n",
-      "Elapsed: 00:17 | Progress: 100%|██████████\n",
-      "Built 165 features\n",
-      "Elapsed: 00:38 | Progress: 100%|██████████\n",
-      "Elapsed: 00:17 | Progress: 100%|██████████\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
+      "INFO:btb.session:Obtaining default configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
       "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: unstack_normalize_dfs_xgb_classifier \n",
+      "  Template: classes.unstack_normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
       "      ('xgboost.XGBClassifier#1', 'n_estimators'): 100\n",
@@ -623,60 +641,29 @@
       "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
       "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
       "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
-      "INFO:btb.session:New optimal found: unstack_normalize_dfs_xgb_classifier - 0.605187908496732\n",
-      "INFO:btb.session:Obtaining default configuration for normalize_dfs_xgb_classifier\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Built 99 features\n",
-      "Elapsed: 01:44 | Progress: 100%|██████████\n",
-      "Elapsed: 00:52 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 01:38 | Progress: 100%|██████████\n",
-      "Elapsed: 00:52 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 01:39 | Progress: 100%|██████████\n",
-      "Elapsed: 00:49 | Progress: 100%|██████████\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: unstack_normalize_dfs_xgb_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 20\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 234\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.23028782510751677\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.9403975339570728\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
-      "INFO:btb.session:New optimal found: unstack_normalize_dfs_xgb_classifier - 0.6106037764640573\n",
-      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n"
+      "INFO:btb.session:New optimal found: classes.unstack_normalize_dfs_xgb_classifier - 0.611234532127027\n",
+      "INFO:btb.session:Obtaining default configuration for classes.normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': '28d8ebbde404a0e501262a652c4d9aa5',\n",
-       " 'name': 'unstack_normalize_dfs_xgb_classifier',\n",
+       "{'id': 'afc8e912142bc6c384231600df9874fc',\n",
+       " 'name': 'classes.unstack_normalize_dfs_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 20,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 234,\n",
+       "   'max_labels'): 0,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 100,\n",
        "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.23028782510751677,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.9403975339570728,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.0,\n",
        "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
-       " 'score': 0.6106037764640573}"
+       " 'score': 0.611234532127027}"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -695,25 +682,25 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "{'id': '28d8ebbde404a0e501262a652c4d9aa5',\n",
-       " 'name': 'unstack_normalize_dfs_xgb_classifier',\n",
+       "{'id': 'afc8e912142bc6c384231600df9874fc',\n",
+       " 'name': 'classes.unstack_normalize_dfs_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 20,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 234,\n",
+       "   'max_labels'): 0,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 100,\n",
        "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.23028782510751677,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.9403975339570728,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.0,\n",
        "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
-       " 'score': 0.6106037764640573}"
+       " 'score': 0.611234532127027}"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -731,22 +718,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 20,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 234,\n",
+       "  'max_labels'): 0,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 100,\n",
        " ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.23028782510751677,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.9403975339570728,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.0,\n",
        " ('xgboost.XGBClassifier#1', 'min_child_weight'): 1}"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -764,16 +751,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "'unstack_normalize_dfs_xgb_classifier'"
+       "'classes.unstack_normalize_dfs_xgb_classifier'"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -792,16 +779,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6106037764640573"
+       "0.611234532127027"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -821,61 +808,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: unstack_normalize_dfs_xgb_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 80\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 32\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 10\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.11814847201162682\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.9589332448610124\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 6\n",
-      "INFO:btb.session:New optimal found: unstack_normalize_dfs_xgb_classifier - 0.640497737556561\n",
-      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
       "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: unstack_normalize_dfs_xgb_classifier \n",
+      "  Template: classes.unstack_normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 98\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 34\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3652063328881058\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.8627183599656656\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 6\n",
-      "INFO:btb.session:New optimal found: unstack_normalize_dfs_xgb_classifier - 0.6592605156037993\n",
-      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for unstack_normalize_dfs_xgb_classifier\n"
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 97\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 364\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 7\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6635800510691365\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.9852977392614163\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 2\n",
+      "INFO:btb.session:New optimal found: classes.unstack_normalize_dfs_xgb_classifier - 0.6379648413546719\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': 'f6b410d303a1cfeafdcfe0dbcf330767',\n",
-       " 'name': 'unstack_normalize_dfs_xgb_classifier',\n",
+       "{'id': '7e6de03286fd71179e2a2f7b3f089ffb',\n",
+       " 'name': 'classes.unstack_normalize_dfs_xgb_classifier',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 98,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 34,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3652063328881058,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.8627183599656656,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 6},\n",
-       " 'score': 0.6592605156037993}"
+       "   'max_labels'): 97,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 364,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 7,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6635800510691365,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.9852977392614163,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 2},\n",
+       " 'score': 0.6379648413546719}"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 21,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -886,16 +863,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.6592605156037993"
+       "0.6379648413546719"
       ]
      },
-     "execution_count": 34,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -906,22 +883,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 98,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 34,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.3652063328881058,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.8627183599656656,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 6}"
+       "  'max_labels'): 97,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 364,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 7,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6635800510691365,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.9852977392614163,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 2}"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -945,18 +922,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 24,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Built 165 features\n",
-      "Elapsed: 00:39 | Progress: 100%|██████████\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "pipeline.fit(train, readings)"
    ]
@@ -972,17 +940,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 25,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Elapsed: 00:14 | Progress: 100%|██████████\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "predictions = pipeline.predict(test, readings)"
    ]
@@ -996,16 +956,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0.693877551020408"
+       "0.7346938775510203"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1034,7 +994,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1053,7 +1013,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -1069,23 +1029,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Elapsed: 00:14 | Progress: 100%|██████████\n"
-     ]
-    },
     {
      "data": {
       "text/plain": [
        "array([0, 0, 0, 1, 0])"
       ]
      },
-     "execution_count": 41,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1112,7 +1065,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.7.9"
   }
  },
  "nbformat": 4,

From 5d9da875e87dad85038dc904e018cc797966ff14 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Wed, 9 Dec 2020 22:44:17 +0100
Subject: [PATCH 129/171] Change loss to binary

---
 .../unstack_double_lstm_timeseries_classifier.json           | 3 ++-
 .../probability/unstack_lstm_timeseries_classifier.json      | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/greenguard/pipelines/probability/unstack_double_lstm_timeseries_classifier.json b/greenguard/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
index 46f05e1..ea48a87 100644
--- a/greenguard/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
+++ b/greenguard/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
@@ -58,7 +58,8 @@
         "keras.Sequential.DoubleLSTMTimeSeriesClassifier#1": {
             "epochs": 35,
             "verbose": false,
-            "classification": false
+            "classification": false,
+            "loss": "keras.losses.binary_crossentropy"
         },
         "numpy.take#1": {
             "indices": 1,
diff --git a/greenguard/pipelines/probability/unstack_lstm_timeseries_classifier.json b/greenguard/pipelines/probability/unstack_lstm_timeseries_classifier.json
index 34760d5..9272257 100644
--- a/greenguard/pipelines/probability/unstack_lstm_timeseries_classifier.json
+++ b/greenguard/pipelines/probability/unstack_lstm_timeseries_classifier.json
@@ -55,10 +55,11 @@
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
         },
-        "keras.Sequential.DoubleLSTMTimeSeriesClassifier#1": {
+        "keras.Sequential.LSTMTimeSeriesClassifier#1": {
             "epochs": 35,
             "verbose": false,
-            "classification": false
+            "classification": false,
+            "loss": "keras.losses.binary_crossentropy"
         },
         "numpy.take#1": {
             "indices": 1,

From d644d1e84059fa1853d5bbe90905667c0032f300 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev
 <41479552+pvk-developer@users.noreply.github.com>
Date: Fri, 22 Jan 2021 11:20:52 +0100
Subject: [PATCH 130/171] Benchmark Upgrade (#57)

* Fix bug: set default instead of get

* Fix dependencies

* add tuning metric

* Add additional arguments to the command line.

* Rename metrics

* Fix test

* Fix dependency requirement

* add parse type float

* TPR and Threshold

* Add partd and fsspec dependencies
---
 greenguard/benchmark.py | 260 ++++++++++++++++++++++++++++++++--------
 greenguard/metrics.py   |  37 +++++-
 greenguard/pipeline.py  |   1 +
 greenguard/results.py   |  15 +--
 setup.py                |  12 +-
 tests/test_benchmark.py |  49 ++++----
 6 files changed, 288 insertions(+), 86 deletions(-)

diff --git a/greenguard/benchmark.py b/greenguard/benchmark.py
index 89d1076..f638138 100644
--- a/greenguard/benchmark.py
+++ b/greenguard/benchmark.py
@@ -1,5 +1,6 @@
 import argparse
 import logging
+import multiprocessing as mp
 import os
 import pickle
 import re
@@ -16,12 +17,14 @@
 from greenguard import get_pipelines
 from greenguard.demo import load_demo
 from greenguard.loaders import CSVLoader
-from greenguard.metrics import METRICS
+from greenguard.metrics import (METRICS, accuracy_score, f1_score,
+                                fpr_score, tpr_score, threshold_score)
 from greenguard.pipeline import GreenGuardPipeline, generate_init_params, generate_preprocessing
 from greenguard.results import load_results, write_results
 
 LOGGER = logging.getLogger(__name__)
 
+DEFAULT_TUNING_METRIC_KWARGS = {'threshold': 0.5}
 LEADERBOARD_COLUMNS = [
     'problem_name',
     'window_size',
@@ -31,7 +34,8 @@
     'default_cv',
     'tuned_cv',
     'tuned_test',
-    'metric',
+    'tuning_metric',
+    'tuning_metric_kwargs',
     'fit_predict_time',
     'default_cv_time',
     'average_cv_time',
@@ -40,6 +44,25 @@
 ]
 
 
+def _scorer(metric, metric_args):
+    if isinstance(metric, str):
+        metric, cost = METRICS[metric]
+
+    def f(expected, observed):
+        try:
+            return metric(expected, observed, **metric_args)
+        except TypeError:
+            if 'threshold' not in metric_args:
+                raise
+
+            kwargs = metric_args.copy()
+            threshold = kwargs.pop('threshold')
+            observed = observed >= threshold
+            return metric(expected, observed, **kwargs)
+
+    return f
+
+
 def _build_init_params(template, window_size, rule, template_params):
     if 'dfs' in template:
         window_size_rule_params = {
@@ -61,15 +84,31 @@ def _build_init_params(template, window_size, rule, template_params):
         }
 
     for primitive, params in window_size_rule_params.items():
-        primitive_params = template_params.get(primitive, {})
+        primitive_params = template_params.setdefault(primitive, {})
         primitive_params.update(params)
 
     return template_params
 
 
-def evaluate_template(template, target_times, readings, metric='f1', tuning_iterations=50,
-                      preprocessing=0, init_params=None, cost=False, test_size=0.25,
-                      cv_splits=3, random_state=0, cache_path=None):
+def evaluate_template(
+    template,
+    target_times,
+    readings,
+    tuning_iterations=50,
+    init_params=None,
+    preprocessing=0,
+    metrics=None,
+    threshold=None,
+    tpr=None,
+    tuning_metric='roc_auc_score',
+    tuning_metric_kwargs=DEFAULT_TUNING_METRIC_KWARGS,
+    cost=False,
+    cv_splits=3,
+    test_size=0.25,
+    random_state=0,
+    cache_path=None,
+    scores={}
+):
     """Returns the scores for a given template.
 
     Args:
@@ -119,18 +158,15 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
             Stores the four types of scores that are being evaluate.
     """
     start_time = datetime.utcnow()
-
-    scores = dict()
-    scores['metric'] = metric
+    scores['tuning_metric'] = str(tuning_metric)
+    scores['tuning_metric_kwargs'] = tuning_metric_kwargs
+    tuning_metric = _scorer(tuning_metric, tuning_metric_kwargs)
 
     train, test = train_test_split(target_times, test_size=test_size, random_state=random_state)
 
-    if isinstance(metric, str):
-        metric, cost = METRICS[metric]
-
     pipeline = GreenGuardPipeline(
         template,
-        metric,
+        metric=tuning_metric,
         cost=cost,
         cv_splits=cv_splits,
         init_params=init_params,
@@ -144,7 +180,7 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
     predictions = pipeline.predict(test, readings)
     fit_predict_time = datetime.utcnow() - fit_predict_time
 
-    scores['default_test'] = metric(test['target'], predictions)
+    scores['default_test'] = tuning_metric(test['target'], predictions)
 
     # Computing the default cross validation score
     default_cv_time = datetime.utcnow()
@@ -157,28 +193,90 @@ def evaluate_template(template, target_times, readings, metric='f1', tuning_iter
     # Computing the cross validation score with tuned hyperparameters
     average_cv_time = datetime.utcnow()
     session.run(tuning_iterations)
-    average_cv_time = (average_cv_time - datetime.utcnow()) / tuning_iterations
+    average_cv_time = (datetime.utcnow() - average_cv_time) / tuning_iterations
 
     scores['tuned_cv'] = pipeline.cv_score
 
     # Computing the test score with tuned hyperparameters
     pipeline.fit(train, readings)
     predictions = pipeline.predict(test, readings)
+    ground_truth = test['target']
+
+    # compute different metrics
+    if tpr:
+        tpr = tpr if isinstance(tpr, list) else [tpr]
+        for value in tpr:
+            threshold = threshold_score(ground_truth, predictions, tpr)
+            scores[f'fpr_tpr/{value}'] = fpr_score(ground_truth, predictions, tpr=tpr)
+            predictions_classes = predictions >= threshold
+            scores[f'accuracy_tpr/{value}'] = accuracy_score(ground_truth, predictions_classes)
+            scores[f'f1_tpr/{value}'] = f1_score(ground_truth, predictions_classes)
+            scores[f'threshold_tpr/{value}'] = threshold_score(ground_truth, predictions, value)
+
+            if f'accuracy_tpr/{value}' not in LEADERBOARD_COLUMNS:
+                LEADERBOARD_COLUMNS.extend([
+                    f'accuracy_tpr/{value}',
+                    f'f1_tpr/{value}',
+                    f'fpr_tpr/{value}',
+                    f'threshold_tpr/{value}',
+                ])
+
+    else:
+        threshold = 0.5 if threshold is None else threshold
+        threshold = threshold if isinstance(threshold, list) else [threshold]
+
+        for value in threshold:
+            scores[f'fpr_threshold/{value}'] = fpr_score(
+                ground_truth, predictions, threshold=value)
+
+            predictions_classes = predictions >= threshold
+            scores[f'accuracy_threshold/{value}'] = accuracy_score(
+                ground_truth, predictions_classes)
+
+            scores[f'f1_threshold/{value}'] = f1_score(ground_truth, predictions_classes)
+            scores[f'tpr_threshold/{value}'] = tpr_score(ground_truth, predictions, value)
+
+            if f'accuracy_threshold/{value}' not in LEADERBOARD_COLUMNS:
+                LEADERBOARD_COLUMNS.extend([
+                    f'accuracy_threshold/{value}',
+                    f'f1_threshold/{value}',
+                    f'fpr_threshold/{value}',
+                    f'tpr_threshold/{value}',
+                ])
 
-    scores['tuned_test'] = metric(test['target'], predictions)
+    scores['tuned_test'] = tuning_metric(test['target'], predictions)
     scores['fit_predict_time'] = fit_predict_time
     scores['default_cv_time'] = default_cv_time
-    scores['default_cv_time'] = default_cv_time
     scores['average_cv_time'] = average_cv_time
     scores['total_time'] = datetime.utcnow() - start_time
 
     return scores
 
 
-def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iterations=50,
-                       init_params=None, target_times=None, readings=None, preprocessing=0,
-                       cost=False, test_size=0.25, cv_splits=3, random_state=0, cache_path=None,
-                       cache_results=None, problem_name=None, output_path=None, progress_bar=None):
+def evaluate_templates(
+    templates,
+    window_size_rule,
+    tuning_iterations=50,
+    init_params=None,
+    preprocessing=0,
+    metrics=None,
+    threshold=None,
+    tpr=None,
+    tuning_metric='roc_auc_score',
+    tuning_metric_kwargs=DEFAULT_TUNING_METRIC_KWARGS,
+    target_times=None,
+    readings=None,
+    cost=False,
+    test_size=0.25,
+    cv_splits=3,
+    random_state=0,
+    cache_path=None,
+    cache_results=None,
+    problem_name=None,
+    output_path=None,
+    progress_bar=None,
+    multiprocess=False
+):
     """Execute the benchmark process and optionally store the result as a ``CSV``.
 
     Args:
@@ -272,11 +370,6 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
     for template, window_rule in product(templates, window_size_rule):
         window_size, rule = window_rule
 
-        scores = dict()
-        scores['problem_name'] = problem_name
-        scores['template'] = template
-        scores['window_size'] = window_size
-        scores['resample_rule'] = rule
 
         try:
             LOGGER.info('Evaluating template %s on problem %s (%s, %s)',
@@ -285,24 +378,66 @@ def evaluate_templates(templates, window_size_rule, metric='f1', tuning_iteratio
             template_params = init_params[template]
             template_params = _build_init_params(template, window_size, rule, template_params)
             template_preprocessing = preprocessing[template]
+            if multiprocess:
+                manager = mp.Manager()
+                scores = manager.dict()
+                process = mp.Process(
+                    target=evaluate_template,
+                    args=(
+                        template,
+                        target_times,
+                        readings,
+                        tuning_iterations,
+                        init_params,
+                        preprocessing,
+                        metrics,
+                        threshold,
+                        tpr,
+                        tuning_metric,
+                        tuning_metric_kwargs,
+                        cost,
+                        cv_splits,
+                        test_size,
+                        random_state,
+                        cache_path,
+                        scores
+                    )
+                )
 
-            result = evaluate_template(
-                template=template,
-                target_times=target_times,
-                readings=readings,
-                metric=metric,
-                tuning_iterations=tuning_iterations,
-                preprocessing=template_preprocessing,
-                init_params=template_params,
-                cost=cost,
-                test_size=test_size,
-                cv_splits=cv_splits,
-                random_state=random_state,
-                cache_path=cache_path
-            )
+                process.start()
+                process.join()
+                if 'tuned_test' not in scores:
+                    scores['status'] = 'ERRORED'
+
+                scores = dict(scores)  # parse the managed dict to dict for pandas.
+
+            else:
+                scores = dict()
+                scores['problem_name'] = problem_name
+                scores['template'] = template
+                scores['window_size'] = window_size
+                scores['resample_rule'] = rule
+                result = evaluate_template(
+                    template=template,
+                    target_times=target_times,
+                    readings=readings,
+                    metrics=metrics,
+                    tuning_metric=tuning_metric,
+                    tuning_metric_kwargs=tuning_metric_kwargs,
+                    threshold=threshold,
+                    tpr=tpr,
+                    tuning_iterations=tuning_iterations,
+                    preprocessing=template_preprocessing,
+                    init_params=template_params,
+                    cost=cost,
+                    test_size=test_size,
+                    cv_splits=cv_splits,
+                    random_state=random_state,
+                    cache_path=cache_path
+                )
 
-            scores.update(result)
-            scores['status'] = 'OK'
+                scores.update(result)
+                scores['status'] = 'OK'
 
         except Exception:
             scores['status'] = 'ERRORED'
@@ -420,8 +555,10 @@ def make_problems(target_times_paths, readings_path, window_size_resample_rule,
 
 def run_benchmark(templates, problems, window_size_resample_rule=None,
                   tuning_iterations=50, signals=None, preprocessing=0, init_params=None,
-                  metric='f1', cost=False, cv_splits=5, test_size=0.33, random_state=0,
-                  cache_path=None, cache_results=None, output_path=None):
+                  metrics=None, threshold=None, tpr=None, tuning_metric='roc_auc_score',
+                  tuning_metric_kwargs=DEFAULT_TUNING_METRIC_KWARGS, cost=False, cv_splits=5,
+                  test_size=0.33, random_state=0, cache_path=None, cache_results=None,
+                  output_path=None, multiprocess=False):
     """Execute the benchmark function and optionally store the result as a ``CSV``.
 
     This function provides a user-friendly interface to interact with the ``evaluate_templates``
@@ -551,8 +688,10 @@ def run_benchmark(templates, problems, window_size_resample_rule=None,
                 df = evaluate_templates(
                     templates,
                     [(window_size, resample_rule)],
-                    metric=metric,
+                    metrics=metrics,
                     tuning_iterations=tuning_iterations,
+                    threshold=threshold,
+                    tpr=tpr,
                     init_params=init_params,
                     target_times=target_times,
                     readings=readings,
@@ -565,7 +704,8 @@ def run_benchmark(templates, problems, window_size_resample_rule=None,
                     cache_results=cache_results,
                     problem_name=problem_name,
                     output_path=None,
-                    progress_bar=pbar
+                    progress_bar=pbar,
+                    multiprocess=multiprocess,
                 )
 
                 results.append(df)
@@ -618,13 +758,23 @@ def _run(args):
             for item in args.window_size_resample_rule
         ]
 
+    if args.tuning_metric_kwargs:
+        args.tuning_metric_kwargs = json.loads(args.tuning_metric_kwargs)
+
+    else:
+        args.tuning_metric_kwargs = DEFAULT_TUNING_METRIC_KWARGS
+
     # run
     results = run_benchmark(
         templates=args.templates,
         problems=args.problems,
         window_size_resample_rule=window_size_resample_rule,
         cv_splits=args.cv_splits,
-        metric=args.metric,
+        metrics=args.metrics,
+        threshold=args.threshold,
+        tpr=args.tpr,
+        tuning_metric=args.tuning_metric,
+        tuning_metric_kwargs=args.tuning_metric_kwargs,
         test_size=args.test_size,
         random_state=args.random_state,
         cache_path=args.cache_path,
@@ -632,6 +782,7 @@ def _run(args):
         tuning_iterations=args.iterations,
         output_path=args.output_path,
         signals=args.signals,
+        multiprocess=args.multiprocess
     )
 
     if not args.output_path:
@@ -699,8 +850,12 @@ def _get_parser():
                      help='Output path where to store the results.')
     run.add_argument('-s', '--cv-splits', type=int, default=5,
                      help='Amount of cross validation splits to use.')
-    run.add_argument('-m', '--metric', type=str, default='f1',
-                     help='Name of metric function to be used during benchmarking.')
+    run.add_argument('-m', '--metrics', nargs='+',
+                     help='Names of metric functions to be used for the benchmarking.')
+    run.add_argument('-T', '--threshold', nargs='+', type=float,
+                     help='Threhshold values for the metrics.')
+    run.add_argument('-P', '--tpr', nargs='+', type=float,
+                     help='TPR vales for the metrics, if provided threshold will be ignored.')
     run.add_argument('-n', '--random-state', type=int, default=0,
                      help='Random state for the cv splits.')
     run.add_argument('-e', '--test-size', type=float, default=0.33,
@@ -713,6 +868,13 @@ def _get_parser():
                      help='Number of iterations to perform per challenge with each candidate.')
     run.add_argument('-S', '--signals', type=str,
                      help='Path to csv file that has signal_id column to use as the signal')
+    run.add_argument('-k', '--tuning-metric', type=str, default='roc_auc_score',
+                     help='Tuning metric to be used.')
+    run.add_argument('-K', '--tuning-metric-kwargs', type=str,
+                     help='Tuning metric args to be used with the metric.')
+    run.add_argument('-u', '--multiprocess', action='http://github.com/store_true',
+                     help='Wether or not to spawn a separate process and avoid crashing.')
+
 
     # Summarize action
     summary = action.add_parser('summarize-results',
diff --git a/greenguard/metrics.py b/greenguard/metrics.py
index ef1c249..6f50d32 100644
--- a/greenguard/metrics.py
+++ b/greenguard/metrics.py
@@ -2,8 +2,8 @@
 import logging
 
 import numpy as np
-from sklearn.metrics import (
-    accuracy_score, f1_score, mean_absolute_error, mean_squared_error, roc_curve, r2_score)
+from sklearn.metrics import (accuracy_score, f1_score, mean_absolute_error,
+                             mean_squared_error, roc_curve, roc_auc_score, r2_score)
 
 LOGGER = logging.getLogger(__name__)
 
@@ -12,7 +12,29 @@ def f1_macro(exp, obs):
     return f1_score(exp, obs, average='macro')
 
 
-def fpr_score(ground_truth, probabilities, tpr=1):
+def threshold_score(ground_truth, probabilities, tpr):
+    roc_fpr, roc_tpr, roc_threshold = roc_curve(ground_truth, probabilities, pos_label=1)
+    try:
+        index = np.where(roc_tpr >= tpr)[0][0]
+    except:
+        LOGGER.warn('Could not find a threshold that satisfies the requested True Positive Rate')
+        index = -1
+
+    return roc_threshold[index]
+
+
+def tpr_score(ground_truth, probabilities, threshold):
+    roc_fpr, roc_tpr, roc_threshold = roc_curve(ground_truth, probabilities, pos_label=1)
+    try:
+        index = np.where(roc_threshold >= threshold)[0][0]
+    except:
+        LOGGER.warn('Could not find a tpr that satisfies the requested threshold')
+        index = -1
+
+    return roc_tpr[index]
+
+
+def fpr_score(ground_truth, probabilities, tpr=None, threshold=None):
     """Compute the False Positive Rate associated with the given True Positive Rate.
 
     This metric computes the False Positive Rate that needs to be assumed in order
@@ -36,7 +58,11 @@ def fpr_score(ground_truth, probabilities, tpr=1):
     """
     roc_fpr, roc_tpr, roc_threshold = roc_curve(ground_truth, probabilities, pos_label=1)
     try:
-        index = np.where(roc_tpr >= tpr)[0][0]
+        if tpr:
+            index = np.where(roc_tpr >= tpr)[0][0]
+        elif threshold:
+            index = np.where(roc_threshold >= threshold)[0][0]
+
     except:
         LOGGER.warn('Could not find a threshold that satisfies the requested True Positive Rate')
         index = -1
@@ -51,5 +77,6 @@ def fpr_score(ground_truth, probabilities, tpr=1):
     'r2': (r2_score, False),
     'mse': (mean_squared_error, True),
     'mae': (mean_absolute_error, True),
-    'fpr_score': (fpr_score, False)
+    'fpr': (fpr_score, False),
+    'roc_auc_score': (roc_auc_score, False)
 }
diff --git a/greenguard/pipeline.py b/greenguard/pipeline.py
index 156c046..2a9cd84 100644
--- a/greenguard/pipeline.py
+++ b/greenguard/pipeline.py
@@ -403,6 +403,7 @@ def _generate_splits(self, template_name, target_times, readings, turbines=None)
             if self._cache_path:
                 split_name = '{}_{}.pkl'.format(template_name, fold)
                 split_path = os.path.join(self._cache_path, split_name)
+                os.makedirs(os.path.dirname(split_path), exist_ok=True)
 
                 with open(split_path, 'wb') as split_file:
                     pickle.dump(split, split_file)
diff --git a/greenguard/results.py b/greenguard/results.py
index 869c26d..bbe4165 100644
--- a/greenguard/results.py
+++ b/greenguard/results.py
@@ -1,4 +1,5 @@
 import os
+from random import random
 
 import pandas as pd
 
@@ -7,15 +8,15 @@ def load_results(files):
     problems_results = dict()
     for filename in files:
         problem = os.path.basename(filename).replace('.csv', '')
-        problems_results[problem] = pd.read_csv(filename, index_col=0).round(6)
+        problems_results[problem] = pd.read_csv(filename).round(6)
 
     return problems_results
 
 
 def get_wins_by_problems(results):
-    df = results.groupby('problem_name')['template', 'window_size', 'resample_rule', 'tuned_test']
+    df = results.groupby('problem_name')['template', 'window_size', 'resample_rule', 'fpr_threshold=0.5']
     df = df.apply(max)
-    df = df.rename(columns={'tuned_test': 'score'})
+    df = df.rename(columns={'fpr_threshold=0.5': 'score'})
 
     return df
 
@@ -25,8 +26,8 @@ def get_exclusive_wins(scores, column, pivot_columns=['window_size', 'resample_r
     for problem in scores.problem_name.unique():
         df = scores[scores['problem_name'] == problem]
         df['wr'] = df.apply(
-            lambda row: '{}_{}'.format(row[pivot_columns[0]], row[pivot_columns[1]]), axis=1)
-        df = df.pivot(index='wr', columns=column, values='tuned_test')
+            lambda row: '{}_{}_{}'.format(row[pivot_columns[0]], row[pivot_columns[1]], random()), axis=1)
+        df = df.pivot(index='wr', columns=column, values='fpr_threshold=0.5')
 
         is_winner = df.T.rank(method='min', ascending=False) == 1
         num_winners = is_winner.sum()
@@ -93,9 +94,9 @@ def write_results(results, output):
     if isinstance(results, dict):
         results = pd.concat(list(results.values()), ignore_index=True)
 
-    window = get_exclusive_wins(results, 'window_size', ['window_size', 'tuned_test'])
+    window = get_exclusive_wins(results, 'window_size', ['window_size', 'fpr_threshold=0.5'])
 
-    resample_pivots = ['resample_rule', ['problem_name', 'tuned_test']]
+    resample_pivots = ['resample_rule', ['problem_name', 'fpr_threshold=0.5']]
     resample = get_exclusive_wins(results, 'resample_rule', resample_pivots)
 
     summary = {
diff --git a/setup.py b/setup.py
index ebbde1c..cefe9da 100644
--- a/setup.py
+++ b/setup.py
@@ -16,9 +16,9 @@
     history = ''
 
 install_requires = [
-    'baytune>=0.3.13.dev0,<0.4',
-    'mlblocks>=0.3.4,<0.4',
-    'mlprimitives>=0.2.6.dev0,<0.3',
+    'baytune>=0.4.0,<0.5',
+    'mlprimitives>=0.3.0,<0.4',
+    'mlblocks>=0.4.0,<0.5',
     'pymongo>=3.7.2,<4',
     'scikit-learn>=0.21',
     'tqdm<4.50.0,>=4.36.1',
@@ -26,12 +26,14 @@
     'scipy>=1.0.1,<2',
     'numpy<1.19.0,>=1.16.0',
     'pandas>=1,<2',
+    'partd>=1.1.0,<2',
+    'fsspec>=0.8.5,<0.9',
     'dask>=2.6.0,<3',
+    'distributed>=2.6.0,<3',
+    'h5py<2.11.0,>=2.10.0',  # fix tensorflow requirement
     'Keras>=2.4',
     'tabulate>=0.8.3,<0.9',
     'xlsxwriter>=1.3.6<1.4',
-    'boto3==1.14.44',
-    'botocore==1.17.44',
 ]
 
 setup_requires = [
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index a1a2d6f..ac87cc2 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -1,5 +1,5 @@
 """Tests for `greenguard.benchmark` module."""
-from sklearn.metrics import f1_score
+import numpy as np
 
 from greenguard.benchmark import evaluate_templates
 from greenguard.demo import load_demo
@@ -8,7 +8,7 @@
 def test_predict():
     # setup
     templates = [
-        'unstack_lstm_timeseries_classifier'
+        'probability.unstack_lstm_timeseries_classifier'
     ]
 
     window_size_rule = [
@@ -16,7 +16,7 @@ def test_predict():
     ]
 
     target_times, readings = load_demo()
-    target_times = target_times.head(10)
+    target_times = target_times.head(40)
     readings = readings.head(100)
 
     # run
@@ -25,7 +25,6 @@ def test_predict():
         readings=readings,
         templates=templates,
         window_size_rule=window_size_rule,
-        metric=f1_score,
         tuning_iterations=1,
         cv_splits=2
     )
@@ -40,29 +39,39 @@ def test_predict():
         'default_cv',
         'tuned_cv',
         'tuned_test',
-        'metric',
+        'tuning_metric',
+        'tuning_metric_kwargs',
         'fit_predict_time',
         'default_cv_time',
         'average_cv_time',
         'total_time',
-        'status'
+        'status',
+        'accuracy_threshold/0.5',
+        'f1_threshold/0.5',
+        'fpr_threshold/0.5',
+        'tpr_threshold/0.5',
     ]
 
     expected_dtypes = [
-        'object',
-        'object',
-        'object',
-        'object',
-        'float64',
-        'float64',
-        'float64',
-        'float64',
-        'float64',
-        'float64',
-        'float64',
-        'float64',
-        'float64',
-        'object',
+        np.dtype('O'),
+        np.dtype('O'),
+        np.dtype('O'),
+        np.dtype('O'),
+        np.dtype('float64'),
+        np.dtype('float64'),
+        np.dtype('float64'),
+        np.dtype('float64'),
+        np.dtype('O'),
+        np.dtype('O'),
+        np.dtype('<m8[ns]'),
+        np.dtype('<m8[ns]'),
+        np.dtype('<m8[ns]'),
+        np.dtype('<m8[ns]'),
+        np.dtype('O'),
+        np.dtype('float64'),
+        np.dtype('float64'),
+        np.dtype('float64'),
+        np.dtype('float64')
     ]
 
     assert (scores_df.columns.to_list() == expected_columns)

From 13208498d24c0f445781f66e8985ccd088673cf3 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 22 Jan 2021 15:20:36 +0100
Subject: [PATCH 131/171] Update notebooks with the new pipelines.

---
 tutorials/03_Benchmarking.ipynb               | 312 ++++---
 .../normalize_dfs_xgb_classifier.ipynb        | 760 +++++++++---------
 ...ck_double_lstm_timeseries_classifier.ipynb |  26 +-
 .../unstack_lstm_timeseries_classifier.ipynb  |  26 +-
 ...unstack_normalize_dfs_xgb_classifier.ipynb | 676 ++++++++--------
 5 files changed, 896 insertions(+), 904 deletions(-)

diff --git a/tutorials/03_Benchmarking.ipynb b/tutorials/03_Benchmarking.ipynb
index ee765a5..56e8701 100644
--- a/tutorials/03_Benchmarking.ipynb
+++ b/tutorials/03_Benchmarking.ipynb
@@ -75,8 +75,11 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "templates = ['unstack_lstm_timeseries_classifier', 'normalize_dfs_xgb_classifier']\n",
-    "window_size_rule = [('1d', '1h'), ('3d', '4h')]\n",
+    "templates = [\n",
+    "    'probability.unstack_lstm_timeseries_classifier',\n",
+    "    'probability.normalize_dfs_xgb_classifier'\n",
+    "]\n",
+    "window_size_rule = [('1d', '1h'), ('2d', '2h')]\n",
     "init_params = {\n",
     "    'unstack_lstm_timeseries_classifier': {\n",
     "        'keras.Sequential.LSTMTimeSeriesClassifier#1': {\n",
@@ -97,124 +100,90 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Using TensorFlow backend.\n",
+      "INFO:greenguard.benchmark:Evaluating template probability.unstack_lstm_timeseries_classifier on problem None (1d, 1h)\n",
       "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: unstack_lstm_timeseries_classifier \n",
+      "  Template: probability.unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
       "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: unstack_lstm_timeseries_classifier \n",
+      "  Template: probability.unstack_lstm_timeseries_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): constant\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 287\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.565737233372491\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 145\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: probability.unstack_lstm_timeseries_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): constant\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 269\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.5973752345055594\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 412\n",
+      "INFO:greenguard.benchmark:Evaluating template probability.unstack_lstm_timeseries_classifier on problem None (2d, 2h)\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: probability.unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
       "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: unstack_lstm_timeseries_classifier \n",
+      "  Template: probability.unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
-      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): median\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 353\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.4718077136146996\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 151\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Built 99 features\n",
-      "Elapsed: 02:58 | Progress: 100%|██████████\n",
-      "Elapsed: 00:58 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 01:54 | Progress: 100%|██████████\n",
-      "Elapsed: 01:08 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 02:20 | Progress: 100%|██████████\n",
-      "Elapsed: 01:09 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 02:16 | Progress: 100%|██████████\n",
-      "Elapsed: 01:07 | Progress: 100%|██████████\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 114\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.016427744327526084\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 224\n",
+      "INFO:greenguard.benchmark:Evaluating template probability.normalize_dfs_xgb_classifier on problem None (1d, 1h)\n",
       "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: normalize_dfs_xgb_classifier \n",
+      "  Template: probability.normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 100\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'n_estimators'): 100\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'max_depth'): 3\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.1\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.0\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 1\n",
       "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: normalize_dfs_xgb_classifier \n",
+      "  Template: probability.normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 18\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 920\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 10\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.02731362750079913\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.46258174821600884\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 3\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Built 99 features\n",
-      "Elapsed: 03:18 | Progress: 100%|██████████\n",
-      "Elapsed: 01:03 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 03:15 | Progress: 100%|██████████\n",
-      "Elapsed: 01:06 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 02:05 | Progress: 100%|██████████\n",
-      "Elapsed: 01:10 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 01:51 | Progress: 100%|██████████\n",
-      "Elapsed: 00:54 | Progress: 100%|██████████\n",
-      "Built 99 features\n",
-      "Elapsed: 01:51 | Progress: 100%|██████████\n",
-      "Elapsed: 00:58 | Progress: 100%|██████████\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 11\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'n_estimators'): 231\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'max_depth'): 9\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.554989010368875\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.909957492053926\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 7\n",
       "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: normalize_dfs_xgb_classifier \n",
+      "  Template: probability.normalize_dfs_xgb_classifier \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 61\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'n_estimators'): 122\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'max_depth'): 5\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.6840927016151666\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.5480298094360865\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 6\n",
+      "INFO:greenguard.benchmark:Evaluating template probability.normalize_dfs_xgb_classifier on problem None (2d, 2h)\n",
+      "INFO:greenguard.pipeline:New configuration found:\n",
+      "  Template: probability.normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 100\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'n_estimators'): 100\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'max_depth'): 3\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.1\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.0\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 1\n",
       "INFO:greenguard.pipeline:New configuration found:\n",
-      "  Template: normalize_dfs_xgb_classifier \n",
+      "  Template: probability.normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 7\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 348\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.5272082810065426\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.04014402178038856\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 2\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Built 99 features\n",
-      "Elapsed: 02:42 | Progress: 100%|██████████\n",
-      "Elapsed: 01:00 | Progress: 100%|██████████\n"
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 99\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'n_estimators'): 616\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'max_depth'): 8\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.0700166745838724\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.40990340522001234\n",
+      "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 10\n"
      ]
     }
    ],
@@ -256,77 +225,156 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>template</th>\n",
+       "      <th>problem_name</th>\n",
        "      <th>window_size</th>\n",
        "      <th>resample_rule</th>\n",
+       "      <th>template</th>\n",
        "      <th>default_test</th>\n",
        "      <th>default_cv</th>\n",
        "      <th>tuned_cv</th>\n",
        "      <th>tuned_test</th>\n",
+       "      <th>tuning_metric</th>\n",
+       "      <th>tuning_metric_kwargs</th>\n",
+       "      <th>fit_predict_time</th>\n",
+       "      <th>default_cv_time</th>\n",
+       "      <th>average_cv_time</th>\n",
+       "      <th>total_time</th>\n",
        "      <th>status</th>\n",
+       "      <th>accuracy_threshold/0.5</th>\n",
+       "      <th>f1_threshold/0.5</th>\n",
+       "      <th>fpr_threshold/0.5</th>\n",
+       "      <th>tpr_threshold/0.5</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>unstack_lstm_timeseries_classifier</td>\n",
+       "      <td>None</td>\n",
        "      <td>1d</td>\n",
        "      <td>1h</td>\n",
-       "      <td>0.711864</td>\n",
-       "      <td>0.646437</td>\n",
-       "      <td>0.646437</td>\n",
-       "      <td>0.666667</td>\n",
+       "      <td>probability.unstack_lstm_timeseries_classifier</td>\n",
+       "      <td>0.350122</td>\n",
+       "      <td>0.538316</td>\n",
+       "      <td>0.618558</td>\n",
+       "      <td>0.463675</td>\n",
+       "      <td>roc_auc_score</td>\n",
+       "      <td>{'threshold': 0.5}</td>\n",
+       "      <td>0 days 00:00:04.250012</td>\n",
+       "      <td>0 days 00:00:14.374875</td>\n",
+       "      <td>0 days 00:00:15.360015</td>\n",
+       "      <td>0 days 00:01:10.806375</td>\n",
        "      <td>OK</td>\n",
+       "      <td>0.640449</td>\n",
+       "      <td>0.058824</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>unstack_lstm_timeseries_classifier</td>\n",
-       "      <td>3d</td>\n",
-       "      <td>4h</td>\n",
-       "      <td>0.703704</td>\n",
-       "      <td>0.577295</td>\n",
-       "      <td>0.616052</td>\n",
-       "      <td>0.709677</td>\n",
+       "      <td>None</td>\n",
+       "      <td>2d</td>\n",
+       "      <td>2h</td>\n",
+       "      <td>probability.unstack_lstm_timeseries_classifier</td>\n",
+       "      <td>0.686203</td>\n",
+       "      <td>0.491949</td>\n",
+       "      <td>0.556803</td>\n",
+       "      <td>0.510989</td>\n",
+       "      <td>roc_auc_score</td>\n",
+       "      <td>{'threshold': 0.5}</td>\n",
+       "      <td>0 days 00:00:04.410682</td>\n",
+       "      <td>0 days 00:00:14.411205</td>\n",
+       "      <td>0 days 00:00:10.633619</td>\n",
+       "      <td>0 days 00:00:55.011304</td>\n",
        "      <td>OK</td>\n",
+       "      <td>0.595506</td>\n",
+       "      <td>0.307692</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>normalize_dfs_xgb_classifier</td>\n",
+       "      <td>None</td>\n",
        "      <td>1d</td>\n",
        "      <td>1h</td>\n",
-       "      <td>0.581818</td>\n",
-       "      <td>0.619698</td>\n",
-       "      <td>0.646750</td>\n",
-       "      <td>0.631579</td>\n",
+       "      <td>probability.normalize_dfs_xgb_classifier</td>\n",
+       "      <td>0.697802</td>\n",
+       "      <td>0.669508</td>\n",
+       "      <td>0.701792</td>\n",
+       "      <td>0.766789</td>\n",
+       "      <td>roc_auc_score</td>\n",
+       "      <td>{'threshold': 0.5}</td>\n",
+       "      <td>0 days 00:01:11.416859</td>\n",
+       "      <td>0 days 00:02:55.012078</td>\n",
+       "      <td>0 days 00:00:00.806430</td>\n",
+       "      <td>0 days 00:05:20.653100</td>\n",
        "      <td>OK</td>\n",
+       "      <td>0.797753</td>\n",
+       "      <td>0.666667</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>normalize_dfs_xgb_classifier</td>\n",
-       "      <td>3d</td>\n",
-       "      <td>4h</td>\n",
-       "      <td>0.581818</td>\n",
-       "      <td>0.619698</td>\n",
-       "      <td>0.637957</td>\n",
-       "      <td>0.642857</td>\n",
+       "      <td>None</td>\n",
+       "      <td>2d</td>\n",
+       "      <td>2h</td>\n",
+       "      <td>probability.normalize_dfs_xgb_classifier</td>\n",
+       "      <td>0.720391</td>\n",
+       "      <td>0.718617</td>\n",
+       "      <td>0.740664</td>\n",
+       "      <td>0.782662</td>\n",
+       "      <td>roc_auc_score</td>\n",
+       "      <td>{'threshold': 0.5}</td>\n",
+       "      <td>0 days 00:01:03.612676</td>\n",
+       "      <td>0 days 00:02:26.925796</td>\n",
+       "      <td>0 days 00:00:00.755424</td>\n",
+       "      <td>0 days 00:04:37.570182</td>\n",
        "      <td>OK</td>\n",
+       "      <td>0.820225</td>\n",
+       "      <td>0.692308</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "                             template window_size resample_rule  default_test  \\\n",
-       "0  unstack_lstm_timeseries_classifier          1d            1h      0.711864   \n",
-       "1  unstack_lstm_timeseries_classifier          3d            4h      0.703704   \n",
-       "2        normalize_dfs_xgb_classifier          1d            1h      0.581818   \n",
-       "3        normalize_dfs_xgb_classifier          3d            4h      0.581818   \n",
+       "  problem_name window_size resample_rule  \\\n",
+       "0         None          1d            1h   \n",
+       "1         None          2d            2h   \n",
+       "2         None          1d            1h   \n",
+       "3         None          2d            2h   \n",
+       "\n",
+       "                                         template  default_test  default_cv  \\\n",
+       "0  probability.unstack_lstm_timeseries_classifier      0.350122    0.538316   \n",
+       "1  probability.unstack_lstm_timeseries_classifier      0.686203    0.491949   \n",
+       "2        probability.normalize_dfs_xgb_classifier      0.697802    0.669508   \n",
+       "3        probability.normalize_dfs_xgb_classifier      0.720391    0.718617   \n",
+       "\n",
+       "   tuned_cv  tuned_test  tuning_metric tuning_metric_kwargs  \\\n",
+       "0  0.618558    0.463675  roc_auc_score   {'threshold': 0.5}   \n",
+       "1  0.556803    0.510989  roc_auc_score   {'threshold': 0.5}   \n",
+       "2  0.701792    0.766789  roc_auc_score   {'threshold': 0.5}   \n",
+       "3  0.740664    0.782662  roc_auc_score   {'threshold': 0.5}   \n",
+       "\n",
+       "        fit_predict_time        default_cv_time        average_cv_time  \\\n",
+       "0 0 days 00:00:04.250012 0 days 00:00:14.374875 0 days 00:00:15.360015   \n",
+       "1 0 days 00:00:04.410682 0 days 00:00:14.411205 0 days 00:00:10.633619   \n",
+       "2 0 days 00:01:11.416859 0 days 00:02:55.012078 0 days 00:00:00.806430   \n",
+       "3 0 days 00:01:03.612676 0 days 00:02:26.925796 0 days 00:00:00.755424   \n",
+       "\n",
+       "              total_time status  accuracy_threshold/0.5  f1_threshold/0.5  \\\n",
+       "0 0 days 00:01:10.806375     OK                0.640449          0.058824   \n",
+       "1 0 days 00:00:55.011304     OK                0.595506          0.307692   \n",
+       "2 0 days 00:05:20.653100     OK                0.797753          0.666667   \n",
+       "3 0 days 00:04:37.570182     OK                0.820225          0.692308   \n",
        "\n",
-       "   default_cv  tuned_cv  tuned_test status  \n",
-       "0    0.646437  0.646437    0.666667     OK  \n",
-       "1    0.577295  0.616052    0.709677     OK  \n",
-       "2    0.619698  0.646750    0.631579     OK  \n",
-       "3    0.619698  0.637957    0.642857     OK  "
+       "   fpr_threshold/0.5  tpr_threshold/0.5  \n",
+       "0                1.0                0.0  \n",
+       "1                1.0                0.0  \n",
+       "2                1.0                0.0  \n",
+       "3                1.0                0.0  "
       ]
      },
      "execution_count": 4,
@@ -355,7 +403,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.11"
+   "version": "3.6.12"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb b/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
index 5bcb1ea..5fc510e 100644
--- a/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
+++ b/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
@@ -11,15 +11,7 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Using TensorFlow backend.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from greenguard.demo import load_demo\n",
     "\n",
@@ -32,7 +24,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pipeline_name = 'normalize_dfs_xgb_classifier'"
+    "pipeline_name = 'classes.normalize_dfs_xgb_classifier'"
    ]
   },
   {
@@ -759,27 +751,27 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>SUM(readings.value)</th>\n",
-       "      <th>STD(readings.value)</th>\n",
+       "      <th>COUNT(readings)</th>\n",
        "      <th>MAX(readings.value)</th>\n",
-       "      <th>SKEW(readings.value)</th>\n",
-       "      <th>MIN(readings.value)</th>\n",
        "      <th>MEAN(readings.value)</th>\n",
-       "      <th>COUNT(readings)</th>\n",
-       "      <th>NUM_UNIQUE(readings.signal_id)</th>\n",
+       "      <th>MIN(readings.value)</th>\n",
        "      <th>MODE(readings.signal_id)</th>\n",
-       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signal_id)</th>\n",
+       "      <th>SKEW(readings.value)</th>\n",
+       "      <th>STD(readings.value)</th>\n",
+       "      <th>SUM(readings.value)</th>\n",
+       "      <th>MODE(readings.DAY(timestamp))</th>\n",
        "      <th>...</th>\n",
-       "      <th>MEAN(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>MEAN(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>SKEW(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>SKEW(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>STD(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))</th>\n",
+       "      <th>STD(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>STD(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>STD(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))</th>\n",
+       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>turbine_id</th>\n",
@@ -809,123 +801,123 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3.457475e+09</td>\n",
-       "      <td>1.456852e+06</td>\n",
+       "      <td>3744</td>\n",
        "      <td>3448719.0</td>\n",
-       "      <td>1.019212</td>\n",
+       "      <td>917107.079193</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>917102.224456</td>\n",
-       "      <td>3770</td>\n",
-       "      <td>26</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
+       "      <td>26</td>\n",
+       "      <td>1.019214</td>\n",
+       "      <td>1.456860e+06</td>\n",
+       "      <td>3.433649e+09</td>\n",
        "      <td>11</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3.465358e+09</td>\n",
-       "      <td>1.459852e+06</td>\n",
+       "      <td>3744</td>\n",
        "      <td>3453777.0</td>\n",
-       "      <td>1.018760</td>\n",
+       "      <td>919201.162179</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>919193.186021</td>\n",
-       "      <td>3770</td>\n",
-       "      <td>26</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
+       "      <td>26</td>\n",
+       "      <td>1.018761</td>\n",
+       "      <td>1.459865e+06</td>\n",
+       "      <td>3.441489e+09</td>\n",
        "      <td>12</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>5</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3.479406e+09</td>\n",
-       "      <td>1.465252e+06</td>\n",
+       "      <td>3744</td>\n",
        "      <td>3463880.0</td>\n",
-       "      <td>1.018192</td>\n",
+       "      <td>922935.352244</td>\n",
        "      <td>2.7</td>\n",
-       "      <td>922919.430027</td>\n",
-       "      <td>3770</td>\n",
-       "      <td>26</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
+       "      <td>26</td>\n",
+       "      <td>1.018192</td>\n",
+       "      <td>1.465277e+06</td>\n",
+       "      <td>3.455470e+09</td>\n",
        "      <td>13</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3.499427e+09</td>\n",
-       "      <td>1.473308e+06</td>\n",
+       "      <td>3744</td>\n",
        "      <td>3474703.0</td>\n",
-       "      <td>1.017664</td>\n",
+       "      <td>928248.092869</td>\n",
        "      <td>-1.0</td>\n",
-       "      <td>928229.883899</td>\n",
-       "      <td>3770</td>\n",
-       "      <td>26</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
+       "      <td>26</td>\n",
+       "      <td>1.017666</td>\n",
+       "      <td>1.473337e+06</td>\n",
+       "      <td>3.475361e+09</td>\n",
        "      <td>14</td>\n",
-       "      <td>2013</td>\n",
+       "      <td>...</td>\n",
        "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>2.912289e+09</td>\n",
-       "      <td>1.477955e+06</td>\n",
+       "      <td>3744</td>\n",
        "      <td>3485019.0</td>\n",
-       "      <td>1.031879</td>\n",
+       "      <td>924186.531200</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>924242.895144</td>\n",
-       "      <td>3770</td>\n",
-       "      <td>26</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
+       "      <td>26</td>\n",
+       "      <td>1.032002</td>\n",
+       "      <td>1.477958e+06</td>\n",
+       "      <td>2.888083e+09</td>\n",
        "      <td>15</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -933,117 +925,125 @@
        "</div>"
       ],
       "text/plain": [
-       "            SUM(readings.value)  STD(readings.value)  MAX(readings.value)  \\\n",
-       "turbine_id                                                                  \n",
-       "T001               3.457475e+09         1.456852e+06            3448719.0   \n",
-       "T001               3.465358e+09         1.459852e+06            3453777.0   \n",
-       "T001               3.479406e+09         1.465252e+06            3463880.0   \n",
-       "T001               3.499427e+09         1.473308e+06            3474703.0   \n",
-       "T001               2.912289e+09         1.477955e+06            3485019.0   \n",
+       "            COUNT(readings)  MAX(readings.value)  MEAN(readings.value)  \\\n",
+       "turbine_id                                                               \n",
+       "T001                   3744            3448719.0         917107.079193   \n",
+       "T001                   3744            3453777.0         919201.162179   \n",
+       "T001                   3744            3463880.0         922935.352244   \n",
+       "T001                   3744            3474703.0         928248.092869   \n",
+       "T001                   3744            3485019.0         924186.531200   \n",
        "\n",
-       "            SKEW(readings.value)  MIN(readings.value)  MEAN(readings.value)  \\\n",
-       "turbine_id                                                                    \n",
-       "T001                    1.019212                  0.0         917102.224456   \n",
-       "T001                    1.018760                  0.0         919193.186021   \n",
-       "T001                    1.018192                  2.7         922919.430027   \n",
-       "T001                    1.017664                 -1.0         928229.883899   \n",
-       "T001                    1.031879                  0.0         924242.895144   \n",
+       "            MIN(readings.value) MODE(readings.signal_id)  \\\n",
+       "turbine_id                                                 \n",
+       "T001                        0.0                      S01   \n",
+       "T001                        0.0                      S01   \n",
+       "T001                        2.7                      S01   \n",
+       "T001                       -1.0                      S01   \n",
+       "T001                        0.0                      S01   \n",
        "\n",
-       "            COUNT(readings)  NUM_UNIQUE(readings.signal_id)  \\\n",
-       "turbine_id                                                    \n",
-       "T001                   3770                              26   \n",
-       "T001                   3770                              26   \n",
-       "T001                   3770                              26   \n",
-       "T001                   3770                              26   \n",
-       "T001                   3770                              26   \n",
+       "            NUM_UNIQUE(readings.signal_id)  SKEW(readings.value)  \\\n",
+       "turbine_id                                                         \n",
+       "T001                                    26              1.019214   \n",
+       "T001                                    26              1.018761   \n",
+       "T001                                    26              1.018192   \n",
+       "T001                                    26              1.017666   \n",
+       "T001                                    26              1.032002   \n",
        "\n",
-       "           MODE(readings.signal_id)  NUM_UNIQUE(readings.DAY(timestamp))  ...  \\\n",
-       "turbine_id                                                                ...   \n",
-       "T001                            S01                                    2  ...   \n",
-       "T001                            S01                                    2  ...   \n",
-       "T001                            S01                                    2  ...   \n",
-       "T001                            S01                                    2  ...   \n",
-       "T001                            S01                                    2  ...   \n",
+       "            STD(readings.value)  SUM(readings.value)  \\\n",
+       "turbine_id                                             \n",
+       "T001               1.456860e+06         3.433649e+09   \n",
+       "T001               1.459865e+06         3.441489e+09   \n",
+       "T001               1.465277e+06         3.455470e+09   \n",
+       "T001               1.473337e+06         3.475361e+09   \n",
+       "T001               1.477958e+06         2.888083e+09   \n",
        "\n",
-       "            MEAN(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))  \\\n",
-       "turbine_id                                                                 \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
+       "            MODE(readings.DAY(timestamp))  ...  \\\n",
+       "turbine_id                                 ...   \n",
+       "T001                                   11  ...   \n",
+       "T001                                   12  ...   \n",
+       "T001                                   13  ...   \n",
+       "T001                                   14  ...   \n",
+       "T001                                   15  ...   \n",
        "\n",
-       "            MEAN(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
+       "            SKEW(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
        "turbine_id                                                                   \n",
-       "T001                                                        2                \n",
-       "T001                                                        2                \n",
-       "T001                                                        2                \n",
-       "T001                                                        2                \n",
-       "T001                                                        2                \n",
+       "T001                                                        0                \n",
+       "T001                                                        0                \n",
+       "T001                                                        0                \n",
+       "T001                                                        0                \n",
+       "T001                                                        0                \n",
        "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
-       "turbine_id                                                                 \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
+       "            SKEW(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \\\n",
+       "turbine_id                                                                \n",
+       "T001                                                        0             \n",
+       "T001                                                        0             \n",
+       "T001                                                        0             \n",
+       "T001                                                        0             \n",
+       "T001                                                        0             \n",
        "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                               \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
+       "            STD(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                              \n",
+       "T001                                                      0.0           \n",
+       "T001                                                      0.0           \n",
+       "T001                                                      0.0           \n",
+       "T001                                                      0.0           \n",
+       "T001                                                      0.0           \n",
        "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
+       "            STD(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))  \\\n",
        "turbine_id                                                                \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
+       "T001                                                      0.0             \n",
+       "T001                                                      0.0             \n",
+       "T001                                                      0.0             \n",
+       "T001                                                      0.0             \n",
+       "T001                                                      0.0             \n",
        "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                                   \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
+       "            STD(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                                  \n",
+       "T001                                                      0.0               \n",
+       "T001                                                      0.0               \n",
+       "T001                                                      0.0               \n",
+       "T001                                                      0.0               \n",
+       "T001                                                      0.0               \n",
        "\n",
-       "            MODE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
-       "turbine_id                                                           \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
+       "            STD(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \\\n",
+       "turbine_id                                                               \n",
+       "T001                                                      0.0            \n",
+       "T001                                                      0.0            \n",
+       "T001                                                      0.0            \n",
+       "T001                                                      0.0            \n",
+       "T001                                                      0.0            \n",
        "\n",
-       "            MODE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                         \n",
-       "T001                                                       11      \n",
-       "T001                                                       12      \n",
-       "T001                                                       13      \n",
-       "T001                                                       14      \n",
-       "T001                                                       15      \n",
+       "            SUM(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                              \n",
+       "T001                                                     7488           \n",
+       "T001                                                     7488           \n",
+       "T001                                                     7488           \n",
+       "T001                                                     7488           \n",
+       "T001                                                     7488           \n",
        "\n",
-       "            MODE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
-       "turbine_id                                                          \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
+       "            SUM(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))  \\\n",
+       "turbine_id                                                                \n",
+       "T001                                                     3744             \n",
+       "T001                                                     3744             \n",
+       "T001                                                     3744             \n",
+       "T001                                                     3744             \n",
+       "T001                                                     3744             \n",
+       "\n",
+       "            SUM(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                                  \n",
+       "T001                                                     7488               \n",
+       "T001                                                     7488               \n",
+       "T001                                                     7488               \n",
+       "T001                                                     7488               \n",
+       "T001                                                     7488               \n",
        "\n",
-       "            MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \n",
-       "turbine_id                                                            \n",
-       "T001                                                        4         \n",
-       "T001                                                        5         \n",
-       "T001                                                        6         \n",
-       "T001                                                        0         \n",
-       "T001                                                        1         \n",
+       "            SUM(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \n",
+       "turbine_id                                                              \n",
+       "T001                                                     3744           \n",
+       "T001                                                     3744           \n",
+       "T001                                                     3744           \n",
+       "T001                                                     3744           \n",
+       "T001                                                     3744           \n",
        "\n",
        "[5 rows x 99 columns]"
       ]
@@ -1235,25 +1235,25 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>SUM(readings.value)</th>\n",
-       "      <th>STD(readings.value)</th>\n",
+       "      <th>COUNT(readings)</th>\n",
        "      <th>MAX(readings.value)</th>\n",
-       "      <th>SKEW(readings.value)</th>\n",
-       "      <th>MIN(readings.value)</th>\n",
        "      <th>MEAN(readings.value)</th>\n",
-       "      <th>COUNT(readings)</th>\n",
+       "      <th>MIN(readings.value)</th>\n",
        "      <th>NUM_UNIQUE(readings.signal_id)</th>\n",
-       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
+       "      <th>SKEW(readings.value)</th>\n",
+       "      <th>STD(readings.value)</th>\n",
+       "      <th>SUM(readings.value)</th>\n",
+       "      <th>MODE(readings.DAY(timestamp))</th>\n",
+       "      <th>MODE(readings.MONTH(timestamp))</th>\n",
        "      <th>...</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>STD(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))</th>\n",
+       "      <th>STD(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>STD(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>STD(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))</th>\n",
+       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
        "      <th>MODE(readings.signal_id)=S01</th>\n",
        "      <th>MODE(readings.signals.MODE(readings.turbine_id))=T001</th>\n",
        "    </tr>\n",
@@ -1285,121 +1285,121 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3.457475e+09</td>\n",
-       "      <td>1.456852e+06</td>\n",
+       "      <td>3744</td>\n",
        "      <td>3448719.0</td>\n",
-       "      <td>1.019212</td>\n",
+       "      <td>917107.079193</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>917102.224456</td>\n",
-       "      <td>3770</td>\n",
        "      <td>26</td>\n",
-       "      <td>2</td>\n",
+       "      <td>1.019214</td>\n",
+       "      <td>1.456860e+06</td>\n",
+       "      <td>3.433649e+09</td>\n",
+       "      <td>11</td>\n",
        "      <td>1</td>\n",
        "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>11</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>4</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3.465358e+09</td>\n",
-       "      <td>1.459852e+06</td>\n",
+       "      <td>3744</td>\n",
        "      <td>3453777.0</td>\n",
-       "      <td>1.018760</td>\n",
+       "      <td>919201.162179</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>919193.186021</td>\n",
-       "      <td>3770</td>\n",
        "      <td>26</td>\n",
-       "      <td>2</td>\n",
+       "      <td>1.018761</td>\n",
+       "      <td>1.459865e+06</td>\n",
+       "      <td>3.441489e+09</td>\n",
+       "      <td>12</td>\n",
        "      <td>1</td>\n",
        "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>12</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>5</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3.479406e+09</td>\n",
-       "      <td>1.465252e+06</td>\n",
+       "      <td>3744</td>\n",
        "      <td>3463880.0</td>\n",
-       "      <td>1.018192</td>\n",
+       "      <td>922935.352244</td>\n",
        "      <td>2.7</td>\n",
-       "      <td>922919.430027</td>\n",
-       "      <td>3770</td>\n",
        "      <td>26</td>\n",
-       "      <td>2</td>\n",
+       "      <td>1.018192</td>\n",
+       "      <td>1.465277e+06</td>\n",
+       "      <td>3.455470e+09</td>\n",
+       "      <td>13</td>\n",
        "      <td>1</td>\n",
        "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>13</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>6</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3.499427e+09</td>\n",
-       "      <td>1.473308e+06</td>\n",
+       "      <td>3744</td>\n",
        "      <td>3474703.0</td>\n",
-       "      <td>1.017664</td>\n",
+       "      <td>928248.092869</td>\n",
        "      <td>-1.0</td>\n",
-       "      <td>928229.883899</td>\n",
-       "      <td>3770</td>\n",
        "      <td>26</td>\n",
-       "      <td>2</td>\n",
+       "      <td>1.017666</td>\n",
+       "      <td>1.473337e+06</td>\n",
+       "      <td>3.475361e+09</td>\n",
+       "      <td>14</td>\n",
        "      <td>1</td>\n",
        "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>14</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>2.912289e+09</td>\n",
-       "      <td>1.477955e+06</td>\n",
+       "      <td>3744</td>\n",
        "      <td>3485019.0</td>\n",
-       "      <td>1.031879</td>\n",
+       "      <td>924186.531200</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>924242.895144</td>\n",
-       "      <td>3770</td>\n",
        "      <td>26</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
+       "      <td>1.032002</td>\n",
+       "      <td>1.477958e+06</td>\n",
+       "      <td>2.888083e+09</td>\n",
        "      <td>15</td>\n",
-       "      <td>2013</td>\n",
        "      <td>1</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
+       "      <td>7488</td>\n",
+       "      <td>3744</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
@@ -1409,109 +1409,101 @@
        "</div>"
       ],
       "text/plain": [
-       "            SUM(readings.value)  STD(readings.value)  MAX(readings.value)  \\\n",
-       "turbine_id                                                                  \n",
-       "T001               3.457475e+09         1.456852e+06            3448719.0   \n",
-       "T001               3.465358e+09         1.459852e+06            3453777.0   \n",
-       "T001               3.479406e+09         1.465252e+06            3463880.0   \n",
-       "T001               3.499427e+09         1.473308e+06            3474703.0   \n",
-       "T001               2.912289e+09         1.477955e+06            3485019.0   \n",
-       "\n",
-       "            SKEW(readings.value)  MIN(readings.value)  MEAN(readings.value)  \\\n",
-       "turbine_id                                                                    \n",
-       "T001                    1.019212                  0.0         917102.224456   \n",
-       "T001                    1.018760                  0.0         919193.186021   \n",
-       "T001                    1.018192                  2.7         922919.430027   \n",
-       "T001                    1.017664                 -1.0         928229.883899   \n",
-       "T001                    1.031879                  0.0         924242.895144   \n",
-       "\n",
-       "            COUNT(readings)  NUM_UNIQUE(readings.signal_id)  \\\n",
-       "turbine_id                                                    \n",
-       "T001                   3770                              26   \n",
-       "T001                   3770                              26   \n",
-       "T001                   3770                              26   \n",
-       "T001                   3770                              26   \n",
-       "T001                   3770                              26   \n",
+       "            COUNT(readings)  MAX(readings.value)  MEAN(readings.value)  \\\n",
+       "turbine_id                                                               \n",
+       "T001                   3744            3448719.0         917107.079193   \n",
+       "T001                   3744            3453777.0         919201.162179   \n",
+       "T001                   3744            3463880.0         922935.352244   \n",
+       "T001                   3744            3474703.0         928248.092869   \n",
+       "T001                   3744            3485019.0         924186.531200   \n",
        "\n",
-       "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
-       "turbine_id                                        \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
+       "            MIN(readings.value)  NUM_UNIQUE(readings.signal_id)  \\\n",
+       "turbine_id                                                        \n",
+       "T001                        0.0                              26   \n",
+       "T001                        0.0                              26   \n",
+       "T001                        2.7                              26   \n",
+       "T001                       -1.0                              26   \n",
+       "T001                        0.0                              26   \n",
        "\n",
-       "            NUM_UNIQUE(readings.MONTH(timestamp))  ...  \\\n",
-       "turbine_id                                         ...   \n",
-       "T001                                            1  ...   \n",
-       "T001                                            1  ...   \n",
-       "T001                                            1  ...   \n",
-       "T001                                            1  ...   \n",
-       "T001                                            1  ...   \n",
+       "            SKEW(readings.value)  STD(readings.value)  SUM(readings.value)  \\\n",
+       "turbine_id                                                                   \n",
+       "T001                    1.019214         1.456860e+06         3.433649e+09   \n",
+       "T001                    1.018761         1.459865e+06         3.441489e+09   \n",
+       "T001                    1.018192         1.465277e+06         3.455470e+09   \n",
+       "T001                    1.017666         1.473337e+06         3.475361e+09   \n",
+       "T001                    1.032002         1.477958e+06         2.888083e+09   \n",
        "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
-       "turbine_id                                                                 \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
+       "            MODE(readings.DAY(timestamp))  MODE(readings.MONTH(timestamp))  \\\n",
+       "turbine_id                                                                   \n",
+       "T001                                   11                                1   \n",
+       "T001                                   12                                1   \n",
+       "T001                                   13                                1   \n",
+       "T001                                   14                                1   \n",
+       "T001                                   15                                1   \n",
        "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                               \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
+       "            ...  STD(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id  ...                                                              \n",
+       "T001        ...                                                0.0           \n",
+       "T001        ...                                                0.0           \n",
+       "T001        ...                                                0.0           \n",
+       "T001        ...                                                0.0           \n",
+       "T001        ...                                                0.0           \n",
        "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
+       "            STD(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))  \\\n",
        "turbine_id                                                                \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
+       "T001                                                      0.0             \n",
+       "T001                                                      0.0             \n",
+       "T001                                                      0.0             \n",
+       "T001                                                      0.0             \n",
+       "T001                                                      0.0             \n",
        "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                                   \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
+       "            STD(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                                  \n",
+       "T001                                                      0.0               \n",
+       "T001                                                      0.0               \n",
+       "T001                                                      0.0               \n",
+       "T001                                                      0.0               \n",
+       "T001                                                      0.0               \n",
        "\n",
-       "            MODE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
-       "turbine_id                                                           \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
+       "            STD(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \\\n",
+       "turbine_id                                                               \n",
+       "T001                                                      0.0            \n",
+       "T001                                                      0.0            \n",
+       "T001                                                      0.0            \n",
+       "T001                                                      0.0            \n",
+       "T001                                                      0.0            \n",
        "\n",
-       "            MODE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                         \n",
-       "T001                                                       11      \n",
-       "T001                                                       12      \n",
-       "T001                                                       13      \n",
-       "T001                                                       14      \n",
-       "T001                                                       15      \n",
+       "            SUM(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                              \n",
+       "T001                                                     7488           \n",
+       "T001                                                     7488           \n",
+       "T001                                                     7488           \n",
+       "T001                                                     7488           \n",
+       "T001                                                     7488           \n",
+       "\n",
+       "            SUM(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))  \\\n",
+       "turbine_id                                                                \n",
+       "T001                                                     3744             \n",
+       "T001                                                     3744             \n",
+       "T001                                                     3744             \n",
+       "T001                                                     3744             \n",
+       "T001                                                     3744             \n",
        "\n",
-       "            MODE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
-       "turbine_id                                                          \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
+       "            SUM(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                                  \n",
+       "T001                                                     7488               \n",
+       "T001                                                     7488               \n",
+       "T001                                                     7488               \n",
+       "T001                                                     7488               \n",
+       "T001                                                     7488               \n",
        "\n",
-       "            MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                             \n",
-       "T001                                                        4          \n",
-       "T001                                                        5          \n",
-       "T001                                                        6          \n",
-       "T001                                                        0          \n",
-       "T001                                                        1          \n",
+       "            SUM(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \\\n",
+       "turbine_id                                                               \n",
+       "T001                                                     3744            \n",
+       "T001                                                     3744            \n",
+       "T001                                                     3744            \n",
+       "T001                                                     3744            \n",
+       "T001                                                     3744            \n",
        "\n",
        "            MODE(readings.signal_id)=S01  \\\n",
        "turbine_id                                 \n",
diff --git a/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb b/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
index 5c7b442..f44377b 100644
--- a/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
+++ b/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
@@ -11,15 +11,7 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Using TensorFlow backend.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from greenguard.demo import load_demo\n",
     "\n",
@@ -32,7 +24,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pipeline_name = 'unstack_double_lstm_timeseries_classifier'"
+    "pipeline_name = 'classes.unstack_double_lstm_timeseries_classifier'"
    ]
   },
   {
@@ -2458,19 +2450,7 @@
    "cell_type": "code",
    "execution_count": 43,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "WARNING:tensorflow:From /home/plamen/.virtualenvs/GreenGuard/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "If using Keras pass *_constraint arguments to layers.\n",
-      "WARNING:tensorflow:From /home/plamen/.virtualenvs/GreenGuard/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "step = 10\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
diff --git a/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb b/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
index faec108..ec68b0e 100644
--- a/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
+++ b/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
@@ -11,15 +11,7 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Using TensorFlow backend.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from greenguard.demo import load_demo\n",
     "\n",
@@ -32,7 +24,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pipeline_name = 'unstack_lstm_timeseries_classifier'"
+    "pipeline_name = 'classes.unstack_lstm_timeseries_classifier'"
    ]
   },
   {
@@ -2332,19 +2324,7 @@
    "cell_type": "code",
    "execution_count": 43,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "WARNING:tensorflow:From /home/plamen/.virtualenvs/GreenGuard/lib/python3.6/site-packages/tensorflow_core/python/ops/resource_variable_ops.py:1630: calling BaseResourceVariable.__init__ (from tensorflow.python.ops.resource_variable_ops) with constraint is deprecated and will be removed in a future version.\n",
-      "Instructions for updating:\n",
-      "If using Keras pass *_constraint arguments to layers.\n",
-      "WARNING:tensorflow:From /home/plamen/.virtualenvs/GreenGuard/lib/python3.6/site-packages/keras/backend/tensorflow_backend.py:422: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.\n",
-      "\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "step = 10\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
diff --git a/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb b/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb
index 6af0092..8fc6c8b 100644
--- a/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb
+++ b/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb
@@ -11,15 +11,7 @@
    "cell_type": "code",
    "execution_count": 1,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Using TensorFlow backend.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from greenguard.demo import load_demo\n",
     "\n",
@@ -32,7 +24,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pipeline_name = 'unstack_normalize_dfs_xgb_classifier'"
+    "pipeline_name = 'classes.unstack_normalize_dfs_xgb_classifier'"
    ]
   },
   {
@@ -851,27 +843,27 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>SUM(readings.value_S09)</th>\n",
-       "      <th>SUM(readings.value_S01)</th>\n",
-       "      <th>SUM(readings.value_S12)</th>\n",
-       "      <th>SUM(readings.value_S10)</th>\n",
-       "      <th>SUM(readings.value_S18)</th>\n",
-       "      <th>SUM(readings.value_S03)</th>\n",
-       "      <th>SUM(readings.value_S16)</th>\n",
-       "      <th>SUM(readings.value_S11)</th>\n",
-       "      <th>SUM(readings.value_S21)</th>\n",
-       "      <th>SUM(readings.value_S08)</th>\n",
-       "      <th>...</th>\n",
-       "      <th>MEAN(readings.value_S20)</th>\n",
        "      <th>COUNT(readings)</th>\n",
-       "      <th>NUM_UNIQUE(readings.WEEKDAY(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
-       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
+       "      <th>MAX(readings.value_S01)</th>\n",
+       "      <th>MAX(readings.value_S02)</th>\n",
+       "      <th>MAX(readings.value_S03)</th>\n",
+       "      <th>MAX(readings.value_S04)</th>\n",
+       "      <th>MAX(readings.value_S05)</th>\n",
+       "      <th>MAX(readings.value_S06)</th>\n",
+       "      <th>MAX(readings.value_S07)</th>\n",
+       "      <th>MAX(readings.value_S08)</th>\n",
+       "      <th>MAX(readings.value_S09)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>SUM(readings.value_S25)</th>\n",
+       "      <th>SUM(readings.value_S26)</th>\n",
        "      <th>MODE(readings.DAY(timestamp))</th>\n",
-       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
        "      <th>MODE(readings.MONTH(timestamp))</th>\n",
+       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
+       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.WEEKDAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>turbine_id</th>\n",
@@ -901,122 +893,122 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>102204875.0</td>\n",
-       "      <td>19558.0</td>\n",
-       "      <td>483068250.0</td>\n",
-       "      <td>486911931.0</td>\n",
-       "      <td>463347422.0</td>\n",
-       "      <td>18602.0</td>\n",
-       "      <td>555.2</td>\n",
-       "      <td>499808026.0</td>\n",
-       "      <td>3090.0</td>\n",
-       "      <td>465058755.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>369.0</td>\n",
+       "      <td>376.0</td>\n",
+       "      <td>378.0</td>\n",
+       "      <td>401.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>324.0</td>\n",
+       "      <td>301.0</td>\n",
+       "      <td>3209069.0</td>\n",
+       "      <td>706654.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>22.406897</td>\n",
-       "      <td>145</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
+       "      <td>2743.0</td>\n",
+       "      <td>20569.0</td>\n",
+       "      <td>11</td>\n",
        "      <td>1</td>\n",
        "      <td>4</td>\n",
-       "      <td>11</td>\n",
        "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>102808505.0</td>\n",
-       "      <td>37965.0</td>\n",
-       "      <td>483585662.0</td>\n",
-       "      <td>487487610.0</td>\n",
-       "      <td>467167621.0</td>\n",
-       "      <td>34495.0</td>\n",
-       "      <td>719.2</td>\n",
-       "      <td>500401347.0</td>\n",
-       "      <td>4970.0</td>\n",
-       "      <td>465669184.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>505.0</td>\n",
+       "      <td>426.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>517.0</td>\n",
+       "      <td>469.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>459.0</td>\n",
+       "      <td>3214181.0</td>\n",
+       "      <td>711718.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>35.282759</td>\n",
-       "      <td>145</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
+       "      <td>4237.0</td>\n",
+       "      <td>32991.0</td>\n",
+       "      <td>12</td>\n",
        "      <td>1</td>\n",
        "      <td>5</td>\n",
-       "      <td>12</td>\n",
        "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>103701788.0</td>\n",
-       "      <td>73948.0</td>\n",
-       "      <td>484538080.0</td>\n",
-       "      <td>488531121.0</td>\n",
-       "      <td>473938223.0</td>\n",
-       "      <td>77804.0</td>\n",
-       "      <td>921.1</td>\n",
-       "      <td>501472849.0</td>\n",
-       "      <td>9902.0</td>\n",
-       "      <td>466675578.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>827.0</td>\n",
+       "      <td>794.0</td>\n",
+       "      <td>839.0</td>\n",
+       "      <td>848.0</td>\n",
+       "      <td>843.0</td>\n",
+       "      <td>843.0</td>\n",
+       "      <td>844.0</td>\n",
+       "      <td>3223315.0</td>\n",
+       "      <td>719405.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>53.255172</td>\n",
-       "      <td>145</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
+       "      <td>9008.0</td>\n",
+       "      <td>63463.0</td>\n",
+       "      <td>13</td>\n",
        "      <td>1</td>\n",
        "      <td>6</td>\n",
-       "      <td>13</td>\n",
        "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>104917985.0</td>\n",
-       "      <td>87206.0</td>\n",
-       "      <td>486012792.0</td>\n",
-       "      <td>490024295.0</td>\n",
-       "      <td>483808936.0</td>\n",
-       "      <td>81629.0</td>\n",
-       "      <td>977.2</td>\n",
-       "      <td>502994331.0</td>\n",
-       "      <td>10720.0</td>\n",
-       "      <td>468099974.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>848.0</td>\n",
+       "      <td>841.0</td>\n",
+       "      <td>838.0</td>\n",
+       "      <td>849.0</td>\n",
+       "      <td>850.0</td>\n",
+       "      <td>848.0</td>\n",
+       "      <td>850.0</td>\n",
+       "      <td>3233989.0</td>\n",
+       "      <td>728250.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>61.482759</td>\n",
-       "      <td>145</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
+       "      <td>10073.0</td>\n",
+       "      <td>70393.0</td>\n",
+       "      <td>14</td>\n",
        "      <td>1</td>\n",
        "      <td>0</td>\n",
-       "      <td>14</td>\n",
        "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>84328762.0</td>\n",
-       "      <td>61778.0</td>\n",
-       "      <td>389879083.0</td>\n",
-       "      <td>396521849.0</td>\n",
-       "      <td>492596536.0</td>\n",
-       "      <td>65122.0</td>\n",
-       "      <td>954.3</td>\n",
-       "      <td>403671026.0</td>\n",
-       "      <td>8684.0</td>\n",
-       "      <td>375635231.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>825.0</td>\n",
+       "      <td>840.0</td>\n",
+       "      <td>840.0</td>\n",
+       "      <td>844.0</td>\n",
+       "      <td>844.0</td>\n",
+       "      <td>830.0</td>\n",
+       "      <td>839.0</td>\n",
+       "      <td>3242820.0</td>\n",
+       "      <td>738155.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>87.315789</td>\n",
-       "      <td>145</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
+       "      <td>7381.0</td>\n",
+       "      <td>59954.0</td>\n",
+       "      <td>15</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>15</td>\n",
        "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1025,61 +1017,69 @@
        "</div>"
       ],
       "text/plain": [
-       "            SUM(readings.value_S09)  SUM(readings.value_S01)  \\\n",
-       "turbine_id                                                     \n",
-       "T001                    102204875.0                  19558.0   \n",
-       "T001                    102808505.0                  37965.0   \n",
-       "T001                    103701788.0                  73948.0   \n",
-       "T001                    104917985.0                  87206.0   \n",
-       "T001                     84328762.0                  61778.0   \n",
+       "            COUNT(readings)  MAX(readings.value_S01)  MAX(readings.value_S02)  \\\n",
+       "turbine_id                                                                      \n",
+       "T001                    144                    369.0                    376.0   \n",
+       "T001                    144                    505.0                    426.0   \n",
+       "T001                    144                    827.0                    794.0   \n",
+       "T001                    144                    848.0                    841.0   \n",
+       "T001                    144                    825.0                    840.0   \n",
        "\n",
-       "            SUM(readings.value_S12)  SUM(readings.value_S10)  \\\n",
+       "            MAX(readings.value_S03)  MAX(readings.value_S04)  \\\n",
        "turbine_id                                                     \n",
-       "T001                    483068250.0              486911931.0   \n",
-       "T001                    483585662.0              487487610.0   \n",
-       "T001                    484538080.0              488531121.0   \n",
-       "T001                    486012792.0              490024295.0   \n",
-       "T001                    389879083.0              396521849.0   \n",
+       "T001                          378.0                    401.0   \n",
+       "T001                          393.0                    517.0   \n",
+       "T001                          839.0                    848.0   \n",
+       "T001                          838.0                    849.0   \n",
+       "T001                          840.0                    844.0   \n",
        "\n",
-       "            SUM(readings.value_S18)  SUM(readings.value_S03)  \\\n",
+       "            MAX(readings.value_S05)  MAX(readings.value_S06)  \\\n",
        "turbine_id                                                     \n",
-       "T001                    463347422.0                  18602.0   \n",
-       "T001                    467167621.0                  34495.0   \n",
-       "T001                    473938223.0                  77804.0   \n",
-       "T001                    483808936.0                  81629.0   \n",
-       "T001                    492596536.0                  65122.0   \n",
+       "T001                          317.0                    324.0   \n",
+       "T001                          469.0                    407.0   \n",
+       "T001                          843.0                    843.0   \n",
+       "T001                          850.0                    848.0   \n",
+       "T001                          844.0                    830.0   \n",
        "\n",
-       "            SUM(readings.value_S16)  SUM(readings.value_S11)  \\\n",
+       "            MAX(readings.value_S07)  MAX(readings.value_S08)  \\\n",
        "turbine_id                                                     \n",
-       "T001                          555.2              499808026.0   \n",
-       "T001                          719.2              500401347.0   \n",
-       "T001                          921.1              501472849.0   \n",
-       "T001                          977.2              502994331.0   \n",
-       "T001                          954.3              403671026.0   \n",
+       "T001                          301.0                3209069.0   \n",
+       "T001                          459.0                3214181.0   \n",
+       "T001                          844.0                3223315.0   \n",
+       "T001                          850.0                3233989.0   \n",
+       "T001                          839.0                3242820.0   \n",
        "\n",
-       "            SUM(readings.value_S21)  SUM(readings.value_S08)  ...  \\\n",
-       "turbine_id                                                    ...   \n",
-       "T001                         3090.0              465058755.0  ...   \n",
-       "T001                         4970.0              465669184.0  ...   \n",
-       "T001                         9902.0              466675578.0  ...   \n",
-       "T001                        10720.0              468099974.0  ...   \n",
-       "T001                         8684.0              375635231.0  ...   \n",
+       "            MAX(readings.value_S09)  ...  SUM(readings.value_S25)  \\\n",
+       "turbine_id                           ...                            \n",
+       "T001                       706654.0  ...                   2743.0   \n",
+       "T001                       711718.0  ...                   4237.0   \n",
+       "T001                       719405.0  ...                   9008.0   \n",
+       "T001                       728250.0  ...                  10073.0   \n",
+       "T001                       738155.0  ...                   7381.0   \n",
        "\n",
-       "            MEAN(readings.value_S20)  COUNT(readings)  \\\n",
-       "turbine_id                                              \n",
-       "T001                       22.406897              145   \n",
-       "T001                       35.282759              145   \n",
-       "T001                       53.255172              145   \n",
-       "T001                       61.482759              145   \n",
-       "T001                       87.315789              145   \n",
+       "            SUM(readings.value_S26)  MODE(readings.DAY(timestamp))  \\\n",
+       "turbine_id                                                           \n",
+       "T001                        20569.0                             11   \n",
+       "T001                        32991.0                             12   \n",
+       "T001                        63463.0                             13   \n",
+       "T001                        70393.0                             14   \n",
+       "T001                        59954.0                             15   \n",
        "\n",
-       "            NUM_UNIQUE(readings.WEEKDAY(timestamp))  \\\n",
-       "turbine_id                                            \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
+       "            MODE(readings.MONTH(timestamp))  \\\n",
+       "turbine_id                                    \n",
+       "T001                                      1   \n",
+       "T001                                      1   \n",
+       "T001                                      1   \n",
+       "T001                                      1   \n",
+       "T001                                      1   \n",
+       "\n",
+       "            MODE(readings.WEEKDAY(timestamp))  MODE(readings.YEAR(timestamp))  \\\n",
+       "turbine_id                                                                      \n",
+       "T001                                        4                            2013   \n",
+       "T001                                        5                            2013   \n",
+       "T001                                        6                            2013   \n",
+       "T001                                        0                            2013   \n",
+       "T001                                        1                            2013   \n",
        "\n",
        "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
        "turbine_id                                        \n",
@@ -1089,14 +1089,6 @@
        "T001                                          2   \n",
        "T001                                          2   \n",
        "\n",
-       "            NUM_UNIQUE(readings.YEAR(timestamp))  \\\n",
-       "turbine_id                                         \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "\n",
        "            NUM_UNIQUE(readings.MONTH(timestamp))  \\\n",
        "turbine_id                                          \n",
        "T001                                            1   \n",
@@ -1105,21 +1097,21 @@
        "T001                                            1   \n",
        "T001                                            1   \n",
        "\n",
-       "            MODE(readings.WEEKDAY(timestamp))  MODE(readings.DAY(timestamp))  \\\n",
-       "turbine_id                                                                     \n",
-       "T001                                        4                             11   \n",
-       "T001                                        5                             12   \n",
-       "T001                                        6                             13   \n",
-       "T001                                        0                             14   \n",
-       "T001                                        1                             15   \n",
+       "            NUM_UNIQUE(readings.WEEKDAY(timestamp))  \\\n",
+       "turbine_id                                            \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
        "\n",
-       "            MODE(readings.YEAR(timestamp))  MODE(readings.MONTH(timestamp))  \n",
-       "turbine_id                                                                   \n",
-       "T001                                  2013                                1  \n",
-       "T001                                  2013                                1  \n",
-       "T001                                  2013                                1  \n",
-       "T001                                  2013                                1  \n",
-       "T001                                  2013                                1  \n",
+       "            NUM_UNIQUE(readings.YEAR(timestamp))  \n",
+       "turbine_id                                        \n",
+       "T001                                           1  \n",
+       "T001                                           1  \n",
+       "T001                                           1  \n",
+       "T001                                           1  \n",
+       "T001                                           1  \n",
        "\n",
        "[5 rows x 165 columns]"
       ]
@@ -1221,27 +1213,27 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>SUM(readings.value_S09)</th>\n",
-       "      <th>SUM(readings.value_S01)</th>\n",
-       "      <th>SUM(readings.value_S12)</th>\n",
-       "      <th>SUM(readings.value_S10)</th>\n",
-       "      <th>SUM(readings.value_S18)</th>\n",
-       "      <th>SUM(readings.value_S03)</th>\n",
-       "      <th>SUM(readings.value_S16)</th>\n",
-       "      <th>SUM(readings.value_S11)</th>\n",
-       "      <th>SUM(readings.value_S21)</th>\n",
-       "      <th>SUM(readings.value_S08)</th>\n",
-       "      <th>...</th>\n",
-       "      <th>MEAN(readings.value_S20)</th>\n",
        "      <th>COUNT(readings)</th>\n",
-       "      <th>NUM_UNIQUE(readings.WEEKDAY(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
-       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
+       "      <th>MAX(readings.value_S01)</th>\n",
+       "      <th>MAX(readings.value_S02)</th>\n",
+       "      <th>MAX(readings.value_S03)</th>\n",
+       "      <th>MAX(readings.value_S04)</th>\n",
+       "      <th>MAX(readings.value_S05)</th>\n",
+       "      <th>MAX(readings.value_S06)</th>\n",
+       "      <th>MAX(readings.value_S07)</th>\n",
+       "      <th>MAX(readings.value_S08)</th>\n",
+       "      <th>MAX(readings.value_S09)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>SUM(readings.value_S25)</th>\n",
+       "      <th>SUM(readings.value_S26)</th>\n",
        "      <th>MODE(readings.DAY(timestamp))</th>\n",
-       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
        "      <th>MODE(readings.MONTH(timestamp))</th>\n",
+       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
+       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.WEEKDAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>turbine_id</th>\n",
@@ -1271,122 +1263,122 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>102204875.0</td>\n",
-       "      <td>19558.0</td>\n",
-       "      <td>483068250.0</td>\n",
-       "      <td>486911931.0</td>\n",
-       "      <td>463347422.0</td>\n",
-       "      <td>18602.0</td>\n",
-       "      <td>555.2</td>\n",
-       "      <td>499808026.0</td>\n",
-       "      <td>3090.0</td>\n",
-       "      <td>465058755.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>369.0</td>\n",
+       "      <td>376.0</td>\n",
+       "      <td>378.0</td>\n",
+       "      <td>401.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>324.0</td>\n",
+       "      <td>301.0</td>\n",
+       "      <td>3209069.0</td>\n",
+       "      <td>706654.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>22.406897</td>\n",
-       "      <td>145</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
+       "      <td>2743.0</td>\n",
+       "      <td>20569.0</td>\n",
+       "      <td>11</td>\n",
        "      <td>1</td>\n",
        "      <td>4</td>\n",
-       "      <td>11</td>\n",
        "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>102808505.0</td>\n",
-       "      <td>37965.0</td>\n",
-       "      <td>483585662.0</td>\n",
-       "      <td>487487610.0</td>\n",
-       "      <td>467167621.0</td>\n",
-       "      <td>34495.0</td>\n",
-       "      <td>719.2</td>\n",
-       "      <td>500401347.0</td>\n",
-       "      <td>4970.0</td>\n",
-       "      <td>465669184.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>505.0</td>\n",
+       "      <td>426.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>517.0</td>\n",
+       "      <td>469.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>459.0</td>\n",
+       "      <td>3214181.0</td>\n",
+       "      <td>711718.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>35.282759</td>\n",
-       "      <td>145</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
+       "      <td>4237.0</td>\n",
+       "      <td>32991.0</td>\n",
+       "      <td>12</td>\n",
        "      <td>1</td>\n",
        "      <td>5</td>\n",
-       "      <td>12</td>\n",
        "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>103701788.0</td>\n",
-       "      <td>73948.0</td>\n",
-       "      <td>484538080.0</td>\n",
-       "      <td>488531121.0</td>\n",
-       "      <td>473938223.0</td>\n",
-       "      <td>77804.0</td>\n",
-       "      <td>921.1</td>\n",
-       "      <td>501472849.0</td>\n",
-       "      <td>9902.0</td>\n",
-       "      <td>466675578.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>827.0</td>\n",
+       "      <td>794.0</td>\n",
+       "      <td>839.0</td>\n",
+       "      <td>848.0</td>\n",
+       "      <td>843.0</td>\n",
+       "      <td>843.0</td>\n",
+       "      <td>844.0</td>\n",
+       "      <td>3223315.0</td>\n",
+       "      <td>719405.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>53.255172</td>\n",
-       "      <td>145</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
+       "      <td>9008.0</td>\n",
+       "      <td>63463.0</td>\n",
+       "      <td>13</td>\n",
        "      <td>1</td>\n",
        "      <td>6</td>\n",
-       "      <td>13</td>\n",
        "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>104917985.0</td>\n",
-       "      <td>87206.0</td>\n",
-       "      <td>486012792.0</td>\n",
-       "      <td>490024295.0</td>\n",
-       "      <td>483808936.0</td>\n",
-       "      <td>81629.0</td>\n",
-       "      <td>977.2</td>\n",
-       "      <td>502994331.0</td>\n",
-       "      <td>10720.0</td>\n",
-       "      <td>468099974.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>848.0</td>\n",
+       "      <td>841.0</td>\n",
+       "      <td>838.0</td>\n",
+       "      <td>849.0</td>\n",
+       "      <td>850.0</td>\n",
+       "      <td>848.0</td>\n",
+       "      <td>850.0</td>\n",
+       "      <td>3233989.0</td>\n",
+       "      <td>728250.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>61.482759</td>\n",
-       "      <td>145</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
+       "      <td>10073.0</td>\n",
+       "      <td>70393.0</td>\n",
+       "      <td>14</td>\n",
        "      <td>1</td>\n",
        "      <td>0</td>\n",
-       "      <td>14</td>\n",
        "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>84328762.0</td>\n",
-       "      <td>61778.0</td>\n",
-       "      <td>389879083.0</td>\n",
-       "      <td>396521849.0</td>\n",
-       "      <td>492596536.0</td>\n",
-       "      <td>65122.0</td>\n",
-       "      <td>954.3</td>\n",
-       "      <td>403671026.0</td>\n",
-       "      <td>8684.0</td>\n",
-       "      <td>375635231.0</td>\n",
+       "      <td>144</td>\n",
+       "      <td>825.0</td>\n",
+       "      <td>840.0</td>\n",
+       "      <td>840.0</td>\n",
+       "      <td>844.0</td>\n",
+       "      <td>844.0</td>\n",
+       "      <td>830.0</td>\n",
+       "      <td>839.0</td>\n",
+       "      <td>3242820.0</td>\n",
+       "      <td>738155.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>87.315789</td>\n",
-       "      <td>145</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
+       "      <td>7381.0</td>\n",
+       "      <td>59954.0</td>\n",
+       "      <td>15</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>15</td>\n",
        "      <td>2013</td>\n",
+       "      <td>2</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1395,61 +1387,69 @@
        "</div>"
       ],
       "text/plain": [
-       "            SUM(readings.value_S09)  SUM(readings.value_S01)  \\\n",
-       "turbine_id                                                     \n",
-       "T001                    102204875.0                  19558.0   \n",
-       "T001                    102808505.0                  37965.0   \n",
-       "T001                    103701788.0                  73948.0   \n",
-       "T001                    104917985.0                  87206.0   \n",
-       "T001                     84328762.0                  61778.0   \n",
+       "            COUNT(readings)  MAX(readings.value_S01)  MAX(readings.value_S02)  \\\n",
+       "turbine_id                                                                      \n",
+       "T001                    144                    369.0                    376.0   \n",
+       "T001                    144                    505.0                    426.0   \n",
+       "T001                    144                    827.0                    794.0   \n",
+       "T001                    144                    848.0                    841.0   \n",
+       "T001                    144                    825.0                    840.0   \n",
        "\n",
-       "            SUM(readings.value_S12)  SUM(readings.value_S10)  \\\n",
+       "            MAX(readings.value_S03)  MAX(readings.value_S04)  \\\n",
        "turbine_id                                                     \n",
-       "T001                    483068250.0              486911931.0   \n",
-       "T001                    483585662.0              487487610.0   \n",
-       "T001                    484538080.0              488531121.0   \n",
-       "T001                    486012792.0              490024295.0   \n",
-       "T001                    389879083.0              396521849.0   \n",
+       "T001                          378.0                    401.0   \n",
+       "T001                          393.0                    517.0   \n",
+       "T001                          839.0                    848.0   \n",
+       "T001                          838.0                    849.0   \n",
+       "T001                          840.0                    844.0   \n",
        "\n",
-       "            SUM(readings.value_S18)  SUM(readings.value_S03)  \\\n",
+       "            MAX(readings.value_S05)  MAX(readings.value_S06)  \\\n",
        "turbine_id                                                     \n",
-       "T001                    463347422.0                  18602.0   \n",
-       "T001                    467167621.0                  34495.0   \n",
-       "T001                    473938223.0                  77804.0   \n",
-       "T001                    483808936.0                  81629.0   \n",
-       "T001                    492596536.0                  65122.0   \n",
+       "T001                          317.0                    324.0   \n",
+       "T001                          469.0                    407.0   \n",
+       "T001                          843.0                    843.0   \n",
+       "T001                          850.0                    848.0   \n",
+       "T001                          844.0                    830.0   \n",
        "\n",
-       "            SUM(readings.value_S16)  SUM(readings.value_S11)  \\\n",
+       "            MAX(readings.value_S07)  MAX(readings.value_S08)  \\\n",
        "turbine_id                                                     \n",
-       "T001                          555.2              499808026.0   \n",
-       "T001                          719.2              500401347.0   \n",
-       "T001                          921.1              501472849.0   \n",
-       "T001                          977.2              502994331.0   \n",
-       "T001                          954.3              403671026.0   \n",
+       "T001                          301.0                3209069.0   \n",
+       "T001                          459.0                3214181.0   \n",
+       "T001                          844.0                3223315.0   \n",
+       "T001                          850.0                3233989.0   \n",
+       "T001                          839.0                3242820.0   \n",
        "\n",
-       "            SUM(readings.value_S21)  SUM(readings.value_S08)  ...  \\\n",
-       "turbine_id                                                    ...   \n",
-       "T001                         3090.0              465058755.0  ...   \n",
-       "T001                         4970.0              465669184.0  ...   \n",
-       "T001                         9902.0              466675578.0  ...   \n",
-       "T001                        10720.0              468099974.0  ...   \n",
-       "T001                         8684.0              375635231.0  ...   \n",
+       "            MAX(readings.value_S09)  ...  SUM(readings.value_S25)  \\\n",
+       "turbine_id                           ...                            \n",
+       "T001                       706654.0  ...                   2743.0   \n",
+       "T001                       711718.0  ...                   4237.0   \n",
+       "T001                       719405.0  ...                   9008.0   \n",
+       "T001                       728250.0  ...                  10073.0   \n",
+       "T001                       738155.0  ...                   7381.0   \n",
        "\n",
-       "            MEAN(readings.value_S20)  COUNT(readings)  \\\n",
-       "turbine_id                                              \n",
-       "T001                       22.406897              145   \n",
-       "T001                       35.282759              145   \n",
-       "T001                       53.255172              145   \n",
-       "T001                       61.482759              145   \n",
-       "T001                       87.315789              145   \n",
+       "            SUM(readings.value_S26)  MODE(readings.DAY(timestamp))  \\\n",
+       "turbine_id                                                           \n",
+       "T001                        20569.0                             11   \n",
+       "T001                        32991.0                             12   \n",
+       "T001                        63463.0                             13   \n",
+       "T001                        70393.0                             14   \n",
+       "T001                        59954.0                             15   \n",
        "\n",
-       "            NUM_UNIQUE(readings.WEEKDAY(timestamp))  \\\n",
-       "turbine_id                                            \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
+       "            MODE(readings.MONTH(timestamp))  \\\n",
+       "turbine_id                                    \n",
+       "T001                                      1   \n",
+       "T001                                      1   \n",
+       "T001                                      1   \n",
+       "T001                                      1   \n",
+       "T001                                      1   \n",
+       "\n",
+       "            MODE(readings.WEEKDAY(timestamp))  MODE(readings.YEAR(timestamp))  \\\n",
+       "turbine_id                                                                      \n",
+       "T001                                        4                            2013   \n",
+       "T001                                        5                            2013   \n",
+       "T001                                        6                            2013   \n",
+       "T001                                        0                            2013   \n",
+       "T001                                        1                            2013   \n",
        "\n",
        "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
        "turbine_id                                        \n",
@@ -1459,14 +1459,6 @@
        "T001                                          2   \n",
        "T001                                          2   \n",
        "\n",
-       "            NUM_UNIQUE(readings.YEAR(timestamp))  \\\n",
-       "turbine_id                                         \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "\n",
        "            NUM_UNIQUE(readings.MONTH(timestamp))  \\\n",
        "turbine_id                                          \n",
        "T001                                            1   \n",
@@ -1475,21 +1467,21 @@
        "T001                                            1   \n",
        "T001                                            1   \n",
        "\n",
-       "            MODE(readings.WEEKDAY(timestamp))  MODE(readings.DAY(timestamp))  \\\n",
-       "turbine_id                                                                     \n",
-       "T001                                        4                             11   \n",
-       "T001                                        5                             12   \n",
-       "T001                                        6                             13   \n",
-       "T001                                        0                             14   \n",
-       "T001                                        1                             15   \n",
+       "            NUM_UNIQUE(readings.WEEKDAY(timestamp))  \\\n",
+       "turbine_id                                            \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
+       "T001                                              2   \n",
        "\n",
-       "            MODE(readings.YEAR(timestamp))  MODE(readings.MONTH(timestamp))  \n",
-       "turbine_id                                                                   \n",
-       "T001                                  2013                                1  \n",
-       "T001                                  2013                                1  \n",
-       "T001                                  2013                                1  \n",
-       "T001                                  2013                                1  \n",
-       "T001                                  2013                                1  \n",
+       "            NUM_UNIQUE(readings.YEAR(timestamp))  \n",
+       "turbine_id                                        \n",
+       "T001                                           1  \n",
+       "T001                                           1  \n",
+       "T001                                           1  \n",
+       "T001                                           1  \n",
+       "T001                                           1  \n",
        "\n",
        "[5 rows x 165 columns]"
       ]

From 23216be0272572d8d2567e894e11e3466d10b23d Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 22 Jan 2021 15:33:29 +0100
Subject: [PATCH 132/171] Add release notes for 0.3.0

---
 HISTORY.md | 19 +++++++++++++++++--
 setup.py   |  1 +
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index ef6042e..e656d1a 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,20 @@
 # History
 
+## 0.3.0 - 2021-01-22
+
+This release increases the supported version of python to `3.8` and also includes changes
+in the installation requirements, where ``pandas`` and ``scikit-optimize`` packages have
+been updated to support higher versions. This changes come together with the newer versions
+of ``MLBlocks`` and ``MLPrimitives``.
+
+### Internal Improvements
+
+* Fix ``run_benchmark`` generating properly the ``init_hyperparameters`` for the pipelines.
+* New ``FPR`` metric.
+* New ``roc_auc_score`` metric.
+* Multiple benchmarking metrics allowed.
+* Multiple ``tpr`` or ``threshold`` values allowed for the benchmark.
+
 ## 0.2.6 - 2020-10-23
 
 * Fix ``mkdir`` when exporting to ``csv`` file the benchmark results.
@@ -16,7 +31,7 @@ With this release we include:
 * `run_benchmark`: A function within the module `benchmark` that allows the user to evaluate
 templates against problems with different window size and resample rules.
 * `summarize_results`: A function that given a `csv` file generates a `xlsx` file with a summary
-tab and a deatailed tab with the results from `run_benchmark`.
+tab and a detailed tab with the results from `run_benchmark`.
 
 ## 0.2.4 - 2020-09-25
 
@@ -28,7 +43,7 @@ tab and a deatailed tab with the results from `run_benchmark`.
 
 ## 0.2.2 - 2020-07-10
 
-### Internal Imrpovements
+### Internal Improvements
 
 * Added github actions.
 
diff --git a/setup.py b/setup.py
index cefe9da..ef8df55 100644
--- a/setup.py
+++ b/setup.py
@@ -89,6 +89,7 @@
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
     ],
     description='AutoML for Renewable Energy Industries.',
     entry_points={

From abcf8bd5f8a071eab1665770b2e2f866301e2e71 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 22 Jan 2021 15:33:42 +0100
Subject: [PATCH 133/171] =?UTF-8?q?Bump=20version:=200.2.7.dev2=20?=
 =?UTF-8?q?=E2=86=92=201.0.0.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 4374a6d..3a19035 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.7.dev2'
+__version__ = '1.0.0.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 724b591..564d55c 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.7.dev2
+current_version = 1.0.0.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index ef8df55..4f00063 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.2.7.dev2',
+    version='1.0.0.dev0',
     zip_safe=False,
 )

From ccf15189b5224e8fbadd1542f29f0b883f186c4d Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 22 Jan 2021 15:33:43 +0100
Subject: [PATCH 134/171] =?UTF-8?q?Bump=20version:=201.0.0.dev0=20?=
 =?UTF-8?q?=E2=86=92=201.0.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 3a19035..a579d1a 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '1.0.0.dev0'
+__version__ = '1.0.0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 564d55c..5979b04 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.0.0.dev0
+current_version = 1.0.0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 4f00063..68d76d5 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='1.0.0.dev0',
+    version='1.0.0',
     zip_safe=False,
 )

From a9c2c38105ea18f7591fce491bf4090e86b25f60 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 22 Jan 2021 15:53:31 +0100
Subject: [PATCH 135/171] =?UTF-8?q?Bump=20version:=201.0.0=20=E2=86=92=201?=
 =?UTF-8?q?.0.1.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index a579d1a..869ef83 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '1.0.0'
+__version__ = '1.0.1.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 5979b04..f92e999 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.0.0
+current_version = 1.0.1.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 68d76d5..ad79245 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='1.0.0',
+    version='1.0.1.dev0',
     zip_safe=False,
 )

From c730e1e1d6cbca04a355e6ae08fba045e5cd0a19 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 22 Jan 2021 18:07:48 +0100
Subject: [PATCH 136/171] Revert 1.0.0 error release

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 869ef83..2a70d1f 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '1.0.1.dev0'
+__version__ = '0.3.0.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index f92e999..e2b3ca8 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 1.0.1.dev0
+current_version = 0.3.0.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index ad79245..1ade9c6 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='1.0.1.dev0',
+    version='0.3.0.dev0',
     zip_safe=False,
 )

From 027a44abe44fc069fd0549574cb534610293fe62 Mon Sep 17 00:00:00 2001
From: Carles Sala <carles@pythiac.com>
Date: Fri, 22 Jan 2021 18:09:58 +0100
Subject: [PATCH 137/171] Prevent making a release before making a release
 candidate

---
 Makefile | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/Makefile b/Makefile
index 2b2d2f8..9a69d1a 100644
--- a/Makefile
+++ b/Makefile
@@ -224,6 +224,7 @@ bumpversion-revert: ## Undo a previous bumpversion-release
 
 CLEAN_DIR := $(shell git status --short | grep -v ??)
 CURRENT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
+CURRENT_VERSION := $(shell grep "^current_version" setup.cfg | grep -o "dev[0-9]*")
 CHANGELOG_LINES := $(shell git diff HEAD..origin/stable HISTORY.md 2>&1 | wc -l)
 
 .PHONY: check-clean
@@ -238,6 +239,12 @@ ifneq ($(CURRENT_BRANCH),master)
 	$(error Please make the release from master branch\n)
 endif
 
+.PHONY: check-candidate
+check-candidate: ## Check if a release candidate has been made
+ifeq ($(CURRENT_VERSION),dev0)
+	$(error Please make a release candidate and test it before atempting a release)
+endif
+
 .PHONY: check-history
 check-history: ## Check if HISTORY.md has been modified
 ifeq ($(CHANGELOG_LINES),0)
@@ -245,7 +252,7 @@ ifeq ($(CHANGELOG_LINES),0)
 endif
 
 .PHONY: check-release
-check-release: check-clean check-master check-history ## Check if the release can be made
+check-release: check-candidate check-clean check-master check-history ## Check if the release can be made
 	@echo "A new release can be made"
 
 .PHONY: release
@@ -260,12 +267,6 @@ release-candidate: check-master publish bumpversion-candidate
 .PHONY: release-candidate-test
 release-candidate-test: check-clean check-master publish-test
 
-.PHONY: release-minor
-release-minor: check-release bumpversion-minor release
-
-.PHONY: release-major
-release-major: check-release bumpversion-major release
-
 
 # DOCKER TARGETS
 

From 9b57da2d0e133a1a1e05faf2aaee4d6048fcefbb Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 22 Jan 2021 18:26:04 +0100
Subject: [PATCH 138/171] =?UTF-8?q?Bump=20version:=200.3.0.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.3.0.dev1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index 2a70d1f..f340385 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.3.0.dev0'
+__version__ = '0.3.0.dev1'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index e2b3ca8..c1622f5 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.0.dev0
+current_version = 0.3.0.dev1
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 1ade9c6..f24b19e 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.3.0.dev0',
+    version='0.3.0.dev1',
     zip_safe=False,
 )

From 32bbc48d68c6573990fec5d9fa164d0df9e1e0d5 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 22 Jan 2021 19:34:39 +0100
Subject: [PATCH 139/171] =?UTF-8?q?Bump=20version:=200.3.0.dev1=20?=
 =?UTF-8?q?=E2=86=92=200.3.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index f340385..dc3e138 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.3.0.dev1'
+__version__ = '0.3.0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index c1622f5..32f7445 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.0.dev1
+current_version = 0.3.0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index f24b19e..ad985e3 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.3.0.dev1',
+    version='0.3.0',
     zip_safe=False,
 )

From d8597f8adf9b579e28a650312616c4373f611cc8 Mon Sep 17 00:00:00 2001
From: Plamen Valentinov Kolev <pvkdeveloper@gmx.com>
Date: Fri, 22 Jan 2021 20:06:49 +0100
Subject: [PATCH 140/171] =?UTF-8?q?Bump=20version:=200.3.0=20=E2=86=92=200?=
 =?UTF-8?q?.3.1.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 greenguard/__init__.py | 2 +-
 setup.cfg              | 2 +-
 setup.py               | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/greenguard/__init__.py b/greenguard/__init__.py
index dc3e138..4d310c6 100644
--- a/greenguard/__init__.py
+++ b/greenguard/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.3.0'
+__version__ = '0.3.1.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 32f7445..ee6b598 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.0
+current_version = 0.3.1.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index ad985e3..bed1432 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/D3-AI/GreenGuard',
-    version='0.3.0',
+    version='0.3.1.dev0',
     zip_safe=False,
 )

From 2c3588f4b6afa70c81ecda21c8ea3ede1b53ac3f Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Thu, 16 Sep 2021 16:07:10 -0400
Subject: [PATCH 141/171] Update dependencies (#58)

* cap keras version

* increase

* let 2.5 be the cap
---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index bed1432..d457c01 100644
--- a/setup.py
+++ b/setup.py
@@ -31,7 +31,7 @@
     'dask>=2.6.0,<3',
     'distributed>=2.6.0,<3',
     'h5py<2.11.0,>=2.10.0',  # fix tensorflow requirement
-    'Keras>=2.4',
+    'Keras>=2.4,<2.5',
     'tabulate>=0.8.3,<0.9',
     'xlsxwriter>=1.3.6<1.4',
 ]

From 93899cfc1363005281f790685bfce257db259fee Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Mon, 15 Nov 2021 09:12:03 -0500
Subject: [PATCH 142/171] Update dependencies (#60)

* update mlprimitives
---
 setup.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/setup.py b/setup.py
index d457c01..79eeb5b 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
 
 install_requires = [
     'baytune>=0.4.0,<0.5',
-    'mlprimitives>=0.3.0,<0.4',
+    'mlprimitives>=0.3.2,<0.4',
     'mlblocks>=0.4.0,<0.5',
     'pymongo>=3.7.2,<4',
     'scikit-learn>=0.21',
@@ -31,7 +31,6 @@
     'dask>=2.6.0,<3',
     'distributed>=2.6.0,<3',
     'h5py<2.11.0,>=2.10.0',  # fix tensorflow requirement
-    'Keras>=2.4,<2.5',
     'tabulate>=0.8.3,<0.9',
     'xlsxwriter>=1.3.6<1.4',
 ]

From 059e78db2b3eaba9f24289f771751c0f5f2a0868 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Mon, 13 Dec 2021 08:26:33 -0500
Subject: [PATCH 143/171] Change GreenGuard to Draco (#61)

* change greenguard to draco

* update badge to gh workflow

* fix logo

* revert docker settings

* change version
---
 .github/ISSUE_TEMPLATE.md                     |   2 +-
 .gitignore                                    |   2 +-
 CONTRIBUTING.rst                              |  32 ++--
 DATABASE.md                                   |   2 +-
 DATA_FORMAT.md                                |  12 +-
 MANIFEST.in                                   |   2 +-
 Makefile                                      |  30 ++--
 README.md                                     |  67 ++++----
 docs/Makefile                                 |   2 +-
 docs/advanced_usage/concepts.md               |   8 +-
 docs/advanced_usage/csv.md                    |   8 +-
 docs/advanced_usage/docker.md                 |  44 ++---
 docs/conf.py                                  |  18 +-
 .../{GreenGuard-200.png => Draco-200.png}     | Bin
 docs/images/{GreenGuard.ico => Draco.ico}     | Bin
 docs/images/{GreenGuard.png => Draco.png}     | Bin
 docs/index.rst                                |   4 +-
 docs/make.bat                                 |   2 +-
 {greenguard => draco}/__init__.py             |   8 +-
 {greenguard => draco}/benchmark.py            |  29 ++--
 {greenguard => draco}/db.py                   |   2 +-
 {greenguard => draco}/demo.py                 |   4 +-
 draco/loaders/__init__.py                     |   5 +
 {greenguard => draco}/loaders/csv.py          |   2 +-
 {greenguard => draco}/metrics.py              |   0
 {greenguard => draco}/pipeline.py             |  18 +-
 .../classes/normalize_dfs_xgb_classifier.json |   0
 .../classes/unstack_dfs_xgb_classifier.json   |   0
 ...ack_double_lstm_timeseries_classifier.json |   0
 .../unstack_lstm_timeseries_classifier.json   |   0
 .../unstack_normalize_dfs_xgb_classifier.json |   0
 .../disabled/dfs_xgb_classifier.json          |   0
 .../normalize_dfs_xgb_classifier.json         |   0
 .../disabled/resample_dfs_xgb_classifier.json |   0
 ...resample_normalize_dfs_xgb_classifier.json |   0
 .../resample_unstack_dfs_xgb_classifier.json  |   0
 ...ack_double_lstm_timeseries_classifier.json |   0
 ...le_unstack_lstm_timeseries_classifier.json |   0
 ..._unstack_normalize_dfs_xgb_classifier.json |   0
 .../normalize_dfs_xgb_classifier.json         |   0
 .../unstack_dfs_xgb_classifier.json           |   0
 ...ack_double_lstm_timeseries_classifier.json |   0
 .../unstack_lstm_timeseries_classifier.json   |   0
 .../unstack_normalize_dfs_xgb_classifier.json |   0
 .../unstacked_dfs_xgb_classifier.json         |   0
 ...ked_double_lstm_timeseries_classifier.json |   0
 .../unstacked_lstm_timeseries_classifier.json |   0
 ...nstacked_normalize_dfs_xgb_classifier.json |   0
 .../primitives/numpy.take.json                |   0
 .../xgboost.XGBClassifier:probabilities.json  |   0
 {greenguard => draco}/results.py              |   0
 {greenguard => draco}/targets.py              |   0
 {greenguard => draco}/utils.py                |   0
 greenguard/loaders/__init__.py                |   5 -
 setup.cfg                                     |   4 +-
 setup.py                                      |  15 +-
 tests/test_benchmark.py                       |   6 +-
 tests/test_metrics.py                         |   2 +-
 tests/test_pipeline.py                        |  18 +-
 ....ipynb => 01_Draco_Machine_Learning.ipynb} |  72 ++++----
 tutorials/02_Extract_Readings.ipynb           | 156 +++++++++---------
 tutorials/03_Benchmarking.ipynb               |  38 ++---
 .../normalize_dfs_xgb_classifier.ipynb        |   8 +-
 ...ck_double_lstm_timeseries_classifier.ipynb |   8 +-
 .../unstack_lstm_timeseries_classifier.ipynb  |   8 +-
 ...unstack_normalize_dfs_xgb_classifier.ipynb |   8 +-
 66 files changed, 323 insertions(+), 328 deletions(-)
 rename docs/images/{GreenGuard-200.png => Draco-200.png} (100%)
 rename docs/images/{GreenGuard.ico => Draco.ico} (100%)
 rename docs/images/{GreenGuard.png => Draco.png} (100%)
 rename {greenguard => draco}/__init__.py (67%)
 rename {greenguard => draco}/benchmark.py (97%)
 rename {greenguard => draco}/db.py (97%)
 rename {greenguard => draco}/demo.py (95%)
 create mode 100644 draco/loaders/__init__.py
 rename {greenguard => draco}/loaders/csv.py (99%)
 rename {greenguard => draco}/metrics.py (100%)
 rename {greenguard => draco}/pipeline.py (97%)
 rename {greenguard => draco}/pipelines/classes/normalize_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/classes/unstack_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/classes/unstack_double_lstm_timeseries_classifier.json (100%)
 rename {greenguard => draco}/pipelines/classes/unstack_lstm_timeseries_classifier.json (100%)
 rename {greenguard => draco}/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/disabled/dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/disabled/normalize_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/disabled/resample_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json (100%)
 rename {greenguard => draco}/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json (100%)
 rename {greenguard => draco}/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/probability/normalize_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/probability/unstack_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/probability/unstack_double_lstm_timeseries_classifier.json (100%)
 rename {greenguard => draco}/pipelines/probability/unstack_lstm_timeseries_classifier.json (100%)
 rename {greenguard => draco}/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/unstacked/unstacked_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/pipelines/unstacked/unstacked_double_lstm_timeseries_classifier.json (100%)
 rename {greenguard => draco}/pipelines/unstacked/unstacked_lstm_timeseries_classifier.json (100%)
 rename {greenguard => draco}/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json (100%)
 rename {greenguard => draco}/primitives/numpy.take.json (100%)
 rename {greenguard => draco}/primitives/xgboost.XGBClassifier:probabilities.json (100%)
 rename {greenguard => draco}/results.py (100%)
 rename {greenguard => draco}/targets.py (100%)
 rename {greenguard => draco}/utils.py (100%)
 delete mode 100644 greenguard/loaders/__init__.py
 rename tutorials/{01_GreenGuard_Machine_Learning.ipynb => 01_Draco_Machine_Learning.ipynb} (89%)

diff --git a/.github/ISSUE_TEMPLATE.md b/.github/ISSUE_TEMPLATE.md
index dc283d2..8877ddd 100644
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@@ -1,4 +1,4 @@
-* GreenGuard version:
+* Draco version:
 * Python version:
 * Operating System:
 
diff --git a/.gitignore b/.gitignore
index f0a4be1..fe2c47a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -106,7 +106,7 @@ ENV/
 # Vim
 .*.swp
 
-greenguard/demo/
+draco/demo/
 notebooks/
 notebooks-private/
 scripts/
diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst
index 9f83841..e97e89e 100644
--- a/CONTRIBUTING.rst
+++ b/CONTRIBUTING.rst
@@ -38,8 +38,8 @@ and "help wanted" is open to whoever wants to implement it.
 Write Documentation
 ~~~~~~~~~~~~~~~~~~~
 
-GreenGuard could always use more documentation, whether as part of the
-official GreenGuard docs, in docstrings, or even on the web in blog posts,
+Draco could always use more documentation, whether as part of the
+official Draco docs, in docstrings, or even on the web in blog posts,
 articles, and such.
 
 Submit Feedback
@@ -57,18 +57,18 @@ If you are proposing a feature:
 Get Started!
 ------------
 
-Ready to contribute? Here's how to set up `GreenGuard` for local development.
+Ready to contribute? Here's how to set up `Draco` for local development.
 
-1. Fork the `GreenGuard` repo on GitHub.
+1. Fork the `Draco` repo on GitHub.
 2. Clone your fork locally::
 
-    $ git clone git@github.com:your_name_here/GreenGuard.git
+    $ git clone git@github.com:your_name_here/Draco.git
 
 3. Install your local copy into a virtualenv. Assuming you have virtualenvwrapper installed,
    this is how you set up your fork for local development::
 
-    $ mkvirtualenv GreenGuard
-    $ cd GreenGuard/
+    $ mkvirtualenv Draco
+    $ cd Draco/
     $ make install-develop
 
 4. Create a branch for local development::
@@ -133,9 +133,9 @@ All the Unit Tests should comply with the following requirements:
 
 1. Unit Tests should be based only in unittest and pytest modules.
 
-2. The tests that cover a module called ``greenguard/path/to/a_module.py``
+2. The tests that cover a module called ``draco/path/to/a_module.py``
    should be implemented in a separated module called
-   ``tests/greenguard/path/to/test_a_module.py``.
+   ``tests/draco/path/to/test_a_module.py``.
    Note that the module name has the ``test_`` prefix and is located in a path similar
    to the one of the tested module, just inside the ``tests`` folder.
 
@@ -165,7 +165,7 @@ Tips
 
 To run a subset of tests::
 
-    $ python -m pytest tests.test_greenguard
+    $ python -m pytest tests.test_draco
     $ python -m pytest -k 'foo'
 
 Release Workflow
@@ -175,11 +175,11 @@ The process of releasing a new version involves several steps combining both ``g
 ``bumpversion`` which, briefly:
 
 1. Merge what is in ``master`` branch into ``stable`` branch.
-2. Update the version in ``setup.cfg``, ``greenguard/__init__.py`` and
+2. Update the version in ``setup.cfg``, ``draco/__init__.py`` and
    ``HISTORY.md`` files.
 3. Create a new git tag pointing at the corresponding commit in ``stable`` branch.
 4. Merge the new commit from ``stable`` into ``master``.
-5. Update the version in ``setup.cfg`` and ``greenguard/__init__.py``
+5. Update the version in ``setup.cfg`` and ``draco/__init__.py``
    to open the next development iteration.
 
 .. note:: Before starting the process, make sure that ``HISTORY.md`` has been updated with a new
@@ -223,15 +223,15 @@ dependency specification, either in ``setup.py``::
 
     install_requires = [
         ...
-        'greenguard>=X.Y.Z.dev',
+        'draco>=X.Y.Z.dev',
         ...
     ]
 
 or in command line::
 
-    pip install 'greenguard>=X.Y.Z.dev'
+    pip install 'draco>=X.Y.Z.dev'
 
 
-.. _GitHub issues page: https://github.com/D3-AI/GreenGuard/issues
-.. _Travis Build Status page: https://travis-ci.org/D3-AI/GreenGuard/pull_requests
+.. _GitHub issues page: https://github.com/sintel-dev/Draco/issues
+.. _Travis Build Status page: https://travis-ci.org/sintel-dev/Draco/pull_requests
 .. _Google docstrings style: https://google.github.io/styleguide/pyguide.html?showone=Comments#Comments
diff --git a/DATABASE.md b/DATABASE.md
index 45cfd38..d3ef8f4 100644
--- a/DATABASE.md
+++ b/DATABASE.md
@@ -1,6 +1,6 @@
 # Database Schema
 
-The **GreenGuard Database** contains the following collections and relationships
+The **Draco Database** contains the following collections and relationships
 
 * Farm
 * Trubine
diff --git a/DATA_FORMAT.md b/DATA_FORMAT.md
index b0fc5a3..7354461 100644
--- a/DATA_FORMAT.md
+++ b/DATA_FORMAT.md
@@ -1,8 +1,8 @@
-# GreenGuard Data Format
+# Draco Data Format
 
 ## Input
 
-The minimum input expected by the **GreenGuard** system consists of the following two elements,
+The minimum input expected by the **Draco** system consists of the following two elements,
 which need to be passed as `pandas.DataFrame` objects:
 
 ### Target Times
@@ -60,7 +60,7 @@ an arbitraty number of additional fields.
 
 ## CSV Format
 
-As explained in a previous section, the input expected by the **GreenGuard** system consists of
+As explained in a previous section, the input expected by the **Draco** system consists of
 two tables which need to be passed as `pandas.DataFrame` objects:
 
 * The `target_times` table, which containing the specification of the problem that we are solving
@@ -69,11 +69,11 @@ two tables which need to be passed as `pandas.DataFrame` objects:
   `turbine_id`, `signal_id`, `timestamp` and `value` fields.
 
 However, in most scenarios the size of the available will far exceed the memory limitations
-of the system on which **GreenGuard** is being run, so loading all the data in a single
+of the system on which **Draco** is being run, so loading all the data in a single
 `pandas.DataFrame` will not be possible.
 
-In order to solve this situation, **GreenGuard** provides a [CSVLoader](
-https://d3-ai.github.io/GreenGuard/api/greenguard.loaders.csv.html#greenguard.loaders.csv.CSVLoader)
+In order to solve this situation, **Draco** provides a [CSVLoader](
+https://sintel-dev.github.io/Draco/api/draco.loaders.csv.html#draco.loaders.csv.CSVLoader)
 class which can be used to load data from what we call the **Raw Data Format**.
 
 ### Raw Data Format
diff --git a/MANIFEST.in b/MANIFEST.in
index 4ebe1c6..0669023 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -4,7 +4,7 @@ include HISTORY.md
 include LICENSE
 include README.md
 
-recursive-include greenguard *.json
+recursive-include draco *.json
 
 recursive-include tests *
 recursive-exclude * __pycache__
diff --git a/Makefile b/Makefile
index 9a69d1a..a6ad0e5 100644
--- a/Makefile
+++ b/Makefile
@@ -89,10 +89,10 @@ install-minimum: ## install the minimum supported versions of the package depend
 
 # LINT TARGETS
 
-.PHONY: lint-greenguard
+.PHONY: lint-draco
 lint-btb: ## check style with flake8 and isort
-	flake8 greenguard
-	isort -c --recursive greenguard
+	flake8 draco
+	isort -c --recursive draco
 
 .PHONY: lint-tests
 lint-tests: ## check style with flake8 and isort
@@ -104,19 +104,19 @@ check-dependencies: ## test if there are any broken dependencies
 	pip check
 
 .PHONY: lint
-lint: check-dependencies lint-greenguard lint-tests ## Run all code style and static testing validations
+lint: check-dependencies lint-draco lint-tests ## Run all code style and static testing validations
 
 .PHONY: fix-lint
 fix-lint: ## fix lint issues using autoflake, autopep8, and isort
-	find greenguard -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables
-	autopep8 --in-place --recursive --aggressive greenguard
-	isort --apply --atomic --recursive greenguard tests
+	find draco -name '*.py' | xargs autoflake --in-place --remove-all-unused-imports --remove-unused-variables
+	autopep8 --in-place --recursive --aggressive draco
+	isort --apply --atomic --recursive draco tests
 
 # TEST TARGETS
 
 .PHONY: test-unit
 test-unit: ## run tests quickly with the default Python
-	python -m pytest --cov=greenguard
+	python -m pytest --cov=draco
 
 .PHONY: test-readme
 test-readme: ## run the readme snippets
@@ -141,7 +141,7 @@ test-all: ## run tests on every Python version with tox
 
 .PHONY: coverage
 coverage: ## check code coverage quickly with the default Python
-	coverage run --source greenguard -m pytest
+	coverage run --source draco -m pytest
 	coverage report -m
 	coverage html
 	$(BROWSER) htmlcov/index.html
@@ -272,7 +272,7 @@ release-candidate-test: check-clean check-master publish-test
 
 .PHONY: docker-build
 docker-build:
-	docker build -f docker/Dockerfile -t greenguard .
+	docker build -f docker/Dockerfile -t draco .
 
 .PHONY: docker-login
 docker-login:
@@ -280,8 +280,8 @@ docker-login:
 
 .PHONY: docker-push
 docker-push: docker-login docker-build
-	@$(eval VERSION := $(shell python -c 'import greenguard; print(greenguard.__version__)'))
-	docker tag greenguard signalsdev/greenguard:$(VERSION)
-	docker push signalsdev/greenguard:$(VERSION)
-	docker tag greenguard signalsdev/greenguard
-	docker push signalsdev/greenguard
+	@$(eval VERSION := $(shell python -c 'import draco; print(draco.__version__)'))
+	docker tag draco signalsdev/draco:$(VERSION)
+	docker push signalsdev/draco:$(VERSION)
+	docker tag draco signalsdev/draco
+	docker push signalsdev/draco
diff --git a/README.md b/README.md
index 0472817..70eb0fe 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,7 @@
 </p>
 
 <p align="left">
-<img width=20% src="https://dai.lids.mit.edu/wp-content/uploads/2019/03/GreenGuard.png" alt="GreenGuard" />
+<img width=20% src="https://dai.lids.mit.edu/wp-content/uploads/2019/03/GreenGuard.png" alt="Draco" />
 </p>
 
 <p align="left">
@@ -12,23 +12,23 @@ AutoML for Renewable Energy Industries.
 </p>
 
 
-[![PyPI Shield](https://img.shields.io/pypi/v/greenguard.svg)](https://pypi.python.org/pypi/greenguard)
-[![Travis CI Shield](https://travis-ci.org/signals-dev/GreenGuard.svg?branch=master)](https://travis-ci.org/signals-dev/GreenGuard)
-[![Downloads](https://pepy.tech/badge/greenguard)](https://pepy.tech/project/greenguard)
-[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/signals-dev/GreenGuard/master?filepath=tutorials)
+[![PyPI Shield](https://img.shields.io/pypi/v/draco-ml.svg)](https://pypi.python.org/pypi/draco-ml)
+[![Tests](https://github.com/sintel-dev/Draco/workflows/Run%20Tests/badge.svg)](https://github.com/sintel-dev/Draco/actions?query=workflow%3A%22Run+Tests%22+branch%3Amaster)
+[![Downloads](https://pepy.tech/badge/draco-ml)](https://pepy.tech/project/draco-ml)
+[![Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/sintel-dev/Draco/master?filepath=tutorials)
 <!--
-[![Coverage Status](https://codecov.io/gh/signals-dev/GreenGuard/branch/master/graph/badge.svg)](https://codecov.io/gh/signals-dev/GreenGuard)
+[![Coverage Status](https://codecov.io/gh/sintel-dev/Draco/branch/master/graph/badge.svg)](https://codecov.io/gh/sintel-dev/Draco)
 -->
 
-# GreenGuard
+# Draco
 
-- License: [MIT](https://github.com/signals-dev/GreenGuard/blob/master/LICENSE)
-- Documentation: https://signals-dev.github.io/GreenGuard
-- Homepage: https://github.com/signals-dev/GreenGuard
+- License: [MIT](https://github.com/sintel-dev/Draco/blob/master/LICENSE)
+- Documentation: https://sintel-dev.github.io/Draco
+- Homepage: https://github.com/sintel-dev/Draco
 
 ## Overview
 
-The GreenGuard project is a collection of end-to-end solutions for machine learning problems
+The Draco project is a collection of end-to-end solutions for machine learning problems
 commonly found in monitoring wind energy production systems. Most tasks utilize sensor data
 emanating from monitoring systems. We utilize the foundational innovations developed for
 automation of machine Learning at Data to AI Lab at MIT.
@@ -47,40 +47,35 @@ The salient aspects of this customized project are:
 ## Resources
 
 * [Data Format](DATA_FORMAT.md).
-* [GreenGuard folder structure](DATA_FORMAT.md#folder-structure).
+* [Draco folder structure](DATA_FORMAT.md#folder-structure).
 
 # Install
 
 ## Requirements
 
-**GreenGuard** has been developed and runs on Python 3.6, 3.7 and 3.8.
+**Draco** has been developed and runs on Python 3.6, 3.7 and 3.8.
 
 Also, although it is not strictly required, the usage of a [virtualenv](
 https://virtualenv.pypa.io/en/latest/) is highly recommended in order to avoid interfering
-with other software installed in the system where you are trying to run **GreenGuard**.
+with other software installed in the system where you are trying to run **Draco**.
 
 ## Download and Install
 
-**GreenGuard** can be installed locally using [pip](https://pip.pypa.io/en/stable/) with
+**Draco** can be installed locally using [pip](https://pip.pypa.io/en/stable/) with
 the following command:
 
 ```bash
-pip install greenguard
+pip install draco-ml
 ```
 
 This will pull and install the latest stable release from [PyPi](https://pypi.org/).
 
 If you want to install from source or contribute to the project please read the
-[Contributing Guide](https://signals-dev.github.io/GreenGuard/contributing.html#get-started).
-
-## Docker usage
-
-**GreenGuard** is prepared to be run inside a docker environment. Please check the
-[docker documentation](docker/README.md) for details about how to run **GreenGuard** using docker.
+[Contributing Guide](https://sintel-dev.github.io/Draco/contributing.html#get-started).
 
 # Data Format
 
-The minimum input expected by the **GreenGuard** system consists of the following two elements,
+The minimum input expected by the **Draco** system consists of the following two elements,
 which need to be passed as `pandas.DataFrame` objects:
 
 ## Target Times
@@ -138,23 +133,23 @@ an arbitraty number of additional fields.
 ## CSV Format
 
 A part from the in-memory data format explained above, which is limited by the memory
-allocation capabilities of the system where it is run, **GreenGuard** is also prepared to
+allocation capabilities of the system where it is run, **Draco** is also prepared to
 load and work with data stored as a collection of CSV files, drastically increasing the amount
 of data which it can work with. Further details about this format can be found in the
 [project documentation site](DATA_FORMAT.md#csv-format).
 
 # Quickstart
 
-In this example we will load some demo data and classify it using a **GreenGuard Pipeline**.
+In this example we will load some demo data and classify it using a **Draco Pipeline**.
 
 ## 1. Load and split the demo data
 
 The first step is to load the demo data.
 
-For this, we will import and call the `greenguard.demo.load_demo` function without any arguments:
+For this, we will import and call the `draco.demo.load_demo` function without any arguments:
 
 ```python3
-from greenguard.demo import load_demo
+from draco.demo import load_demo
 
 target_times, readings = load_demo()
 ```
@@ -212,17 +207,17 @@ test_targets = test.pop('target')
 
 Once we have the data ready, we need to find a suitable pipeline.
 
-The list of available GreenGuard Pipelines can be obtained using the `greenguard.get_pipelines`
+The list of available Draco Pipelines can be obtained using the `draco.get_pipelines`
 function.
 
 ```python3
-from greenguard import get_pipelines
+from draco import get_pipelines
 
 pipelines = get_pipelines()
 ```
 
 The returned `pipeline` variable will be `list` containing the names of all the pipelines
-available in the GreenGuard system:
+available in the Draco system:
 
 ```
 ['classes.unstack_double_lstm_timeseries_classifier',
@@ -244,13 +239,13 @@ pipeline_name = 'classes.normalize_dfs_xgb_classifier'
 Once we have loaded the data and selected the pipeline that we will use, we have to
 fit it.
 
-For this, we will create an instance of a `GreenGuardPipeline` object passing the name
+For this, we will create an instance of a `DracoPipeline` object passing the name
 of the pipeline that we want to use:
 
 ```python3
-from greenguard.pipeline import GreenGuardPipeline
+from draco.pipeline import DracoPipeline
 
-pipeline = GreenGuardPipeline(pipeline_name)
+pipeline = DracoPipeline(pipeline_name)
 ```
 
 And then we can directly fit it to our data by calling its `fit` method and passing in the
@@ -283,7 +278,7 @@ f1_score(test_targets, predictions)
 
 ## What's next?
 
-For more details about **GreenGuard** and all its possibilities and features, please check the
-[project documentation site](https://signals-dev.github.io/GreenGuard/)
+For more details about **Draco** and all its possibilities and features, please check the
+[project documentation site](https://sintel-dev.github.io/Draco/)
 Also do not forget to have a look at the [tutorials](
-https://github.com/signals-dev/GreenGuard/tree/master/tutorials)!
+https://github.com/sintel-dev/Draco/tree/master/tutorials)!
diff --git a/docs/Makefile b/docs/Makefile
index 5c762a7..e2106b7 100644
--- a/docs/Makefile
+++ b/docs/Makefile
@@ -4,7 +4,7 @@
 # You can set these variables from the command line.
 SPHINXOPTS    =
 SPHINXBUILD   = python -msphinx
-SPHINXPROJ    = greenguard
+SPHINXPROJ    = draco
 SOURCEDIR     = .
 BUILDDIR      = _build
 
diff --git a/docs/advanced_usage/concepts.md b/docs/advanced_usage/concepts.md
index f39bffa..2489b44 100644
--- a/docs/advanced_usage/concepts.md
+++ b/docs/advanced_usage/concepts.md
@@ -1,6 +1,6 @@
 # Concepts
 
-Here we briefly explain some of the concepts and terminology used within the GreenGuard
+Here we briefly explain some of the concepts and terminology used within the Draco
 project and documentation.
 
 ## Primitive
@@ -34,7 +34,7 @@ hyperparameters for a Template. Hence, Pipelines:
   hyperparameters of their template.
 
 A pipeline can be fitted and evaluated directly using [MLBlocks](
-https://hdi-project.github.io/MLBlocks), or using the **GreenGuardPipeline**.
+https://MLBazaar.github.io/MLBlocks), or using the **DracoPipeline**.
 
 ## Tuning
 
@@ -48,11 +48,11 @@ which hyperparameters are more likely to get the best results in the next iterat
 We call each one of these evaluations a **tuning iteration**.
 
 The process of selecting and tuning the templates is handled by a [BTBSession](
-https://hdi-project.github.io/BTB/tutorials/03_Session.html), which is responsible for
+https://MLBazaar.github.io/BTB/tutorials/03_Session.html), which is responsible for
 discarding the templates that do not work on the given data and for keeping
 track of the template and hyperparameters that obtain the best performance.
 
-## GreenGuardPipeline
+## DracoPipeline
 
 This class is the one in charge of loading the **MLBlocks Pipelines** configured in the
 system and use them to learn from the data and make predictions.
diff --git a/docs/advanced_usage/csv.md b/docs/advanced_usage/csv.md
index c267807..eab0c19 100644
--- a/docs/advanced_usage/csv.md
+++ b/docs/advanced_usage/csv.md
@@ -1,6 +1,6 @@
 # CSV Format
 
-As explained in a previous section, the input expected by the **GreenGuard** system consists of
+As explained in a previous section, the input expected by the **Draco** system consists of
 two tables which need to be passed as `pandas.DataFrame` objects:
 
 * The `target_times` table, which containing the specification of the problem that we are solving
@@ -9,11 +9,11 @@ two tables which need to be passed as `pandas.DataFrame` objects:
   `turbine_id`, `signal_id`, `timestamp` and `value` fields.
 
 However, in most scenarios the size of the available will far exceed the memory limitations
-of the system on which **GreenGuard** is being run, so loading all the data in a single
+of the system on which **Draco** is being run, so loading all the data in a single
 `pandas.DataFrame` will not be possible.
 
-In order to solve this situation, **GreenGuard** provides a [CSVLoader](
-https://d3-ai.github.io/GreenGuard/api/greenguard.loaders.csv.html#greenguard.loaders.csv.CSVLoader)
+In order to solve this situation, **Draco** provides a [CSVLoader](
+https://sintel-dev.github.io/Draco/api/draco.loaders.csv.html#draco.loaders.csv.CSVLoader)
 class which can be used to load data from what we call the **Raw Data Format**.
 
 ## Raw Data Format
diff --git a/docs/advanced_usage/docker.md b/docs/advanced_usage/docker.md
index e5603df..a2cb198 100644
--- a/docs/advanced_usage/docker.md
+++ b/docs/advanced_usage/docker.md
@@ -1,34 +1,34 @@
 # Docker Usage
 
-**GreenGuard** comes configured and ready to be distributed and run as a docker image which starts
-a jupyter notebook already configured to use greenguard, with all the required dependencies already
+**Draco** comes configured and ready to be distributed and run as a docker image which starts
+a jupyter notebook already configured to use draco, with all the required dependencies already
 installed.
 
 ## Requirements
 
-The only requirement in order to run the GreenGuard Docker image is to have Docker installed and
+The only requirement in order to run the Draco Docker image is to have Docker installed and
 that the user has enough permissions to run it.
 
 Installation instructions for any possible system compatible can be found [here](https://docs.docker.com/install/)
 
-Additionally, the system that builds the GreenGuard Docker image will also need to have a working
+Additionally, the system that builds the Draco Docker image will also need to have a working
 internet connection that allows downloading the base image and the additional python depenedencies.
 
-## Building the GreenGuard Docker Image
+## Building the Draco Docker Image
 
-After having cloned the **GreenGuard** repository, all you have to do in order to build the GreenGuard Docker
+After having cloned the **Draco** repository, all you have to do in order to build the Draco Docker
 Image is running this command:
 
 ```bash
 make docker-jupyter-build
 ```
 
-After a few minutes, the new image, called `greenguard-jupyter`, will have been built into the system
+After a few minutes, the new image, called `draco-jupyter`, will have been built into the system
 and will be ready to be used or distributed.
 
-## Distributing the GreenGuard Docker Image
+## Distributing the Draco Docker Image
 
-Once the `greenguard-jupyter` image is built, it can be distributed in several ways.
+Once the `draco-jupyter` image is built, it can be distributed in several ways.
 
 ### Distributing using a Docker registry
 
@@ -38,7 +38,7 @@ In order to do so, we will need to have write access to a public or private regi
 [login](https://docs.docker.com/engine/reference/commandline/login/)!) and execute these commands:
 
 ```bash
-docker tag greenguard-jupyter:latest your-registry-name:some-tag
+docker tag draco-jupyter:latest your-registry-name:some-tag
 docker push your-registry-name:some-tag
 ```
 
@@ -46,7 +46,7 @@ Afterwards, in the receiving machine:
 
 ```bash
 docker pull your-registry-name:some-tag
-docker tag your-registry-name:some-tag greenguard-jupyter:latest
+docker tag your-registry-name:some-tag draco-jupyter:latest
 ```
 
 ### Distributing as a file
@@ -57,28 +57,28 @@ using the following command.
 In the system that already has the image:
 
 ```bash
-docker save --output greenguard-jupyter.tar greenguard-jupyter
+docker save --output draco-jupyter.tar draco-jupyter
 ```
 
-Then copy over the file `greenguard-jupyter.tar` to the new system and there, run:
+Then copy over the file `draco-jupyter.tar` to the new system and there, run:
 
 ```bash
-docker load --input greenguard-jupyter.tar
+docker load --input draco-jupyter.tar
 ```
 
-After these commands, the `greenguard-jupyter` image should be available and ready to be used in the
+After these commands, the `draco-jupyter` image should be available and ready to be used in the
 new system.
 
 
-## Running the greenguard-jupyter image
+## Running the draco-jupyter image
 
-Once the `greenguard-jupyter` image has been built, pulled or loaded, it is ready to be run.
+Once the `draco-jupyter` image has been built, pulled or loaded, it is ready to be run.
 
 This can be done in two ways:
 
-### Running greenguard-jupyter with the code
+### Running draco-jupyter with the code
 
-If the GreenGuard source code is available in the system, running the image is as simple as running
+If the Draco source code is available in the system, running the image is as simple as running
 this command from within the root of the project:
 
 ```bash
@@ -93,13 +93,13 @@ which means that any changes that you do in your local code will immediately be
 within your notebooks, and that any notebook that you create within jupyter will also show
 up in your `notebooks` folder!
 
-### Running greenguard-jupyter without the greenguard code
+### Running draco-jupyter without the draco code
 
-If the GreenGuard source code is not available in the system and only the Docker Image is, you can
+If the Draco source code is not available in the system and only the Docker Image is, you can
 still run the image by using this command:
 
 ```bash
-docker run -ti -p8888:8888 greenguard-jupyter
+docker run -ti -p8888:8888 draco-jupyter
 ```
 
 In this case, the code changes and the notebooks that you create within jupyter will stay
diff --git a/docs/conf.py b/docs/conf.py
index 9e23c07..ecd0023 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 #
-# GreenGuard documentation build configuration file, created by
+# Draco documentation build configuration file, created by
 # sphinx-quickstart on Fri Jun  9 13:47:02 2017.
 #
 # This file is execfile()d with the current directory set to its
@@ -20,7 +20,7 @@
 
 import sphinx_rtd_theme # For read the docs theme
 
-import greenguard
+import draco
 
 # -- General configuration ---------------------------------------------
 
@@ -58,22 +58,22 @@
 nbsphinx_execute = 'never'
 
 # General information about the project.
-project = 'GreenGuard'
-slug = 'greenguard'
+project = 'Draco'
+slug = 'draco'
 title = project + ' Documentation',
 copyright = '2018, MIT Data To AI Lab'
 author = 'MIT Data To AI Lab'
 description = 'AutoML for Renewable Energy Industries'
-user = 'D3-AI'
+user = 'sintel-dev'
 
 # The version info for the project you're documenting, acts as replacement
 # for |version| and |release|, also used in various other places throughout
 # the built documents.
 #
 # The short X.Y version.
-version = greenguard.__version__
+version = draco.__version__
 # The full version, including alpha/beta/rc tags.
-release = greenguard.__version__
+release = draco.__version__
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -127,13 +127,13 @@
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
 # html_favicon = 'images/favicon.ico'
-html_favicon = 'images/GreenGuard.ico'
+html_favicon = 'images/Draco.ico'
 
 # If given, this must be the name of an image file (path relative to the
 # configuration directory) that is the logo of the docs. It is placed at
 # the top of the sidebar; its width should therefore not exceed 200 pixels.
 # html_logo = 'images/dai-logo.png'
-html_logo = 'images/GreenGuard-200.png'
+html_logo = 'images/Draco-200.png'
 
 # -- Options for HTMLHelp output ---------------------------------------
 
diff --git a/docs/images/GreenGuard-200.png b/docs/images/Draco-200.png
similarity index 100%
rename from docs/images/GreenGuard-200.png
rename to docs/images/Draco-200.png
diff --git a/docs/images/GreenGuard.ico b/docs/images/Draco.ico
similarity index 100%
rename from docs/images/GreenGuard.ico
rename to docs/images/Draco.ico
diff --git a/docs/images/GreenGuard.png b/docs/images/Draco.png
similarity index 100%
rename from docs/images/GreenGuard.png
rename to docs/images/Draco.png
diff --git a/docs/index.rst b/docs/index.rst
index dad6c5f..75b0cdb 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -10,7 +10,7 @@
    :caption: Tutorials
    :hidden:
 
-   tutorials/01_GreenGuard_Quickstart
+   tutorials/01_Draco_Quickstart
    tutorials/02_Extract_Readings
 
 .. toctree::
@@ -25,7 +25,7 @@
    :caption: Resources
    :hidden:
 
-   API Reference <api/greenguard>
+   API Reference <api/draco>
    contributing
    authors
    history
diff --git a/docs/make.bat b/docs/make.bat
index ad6474a..b427863 100644
--- a/docs/make.bat
+++ b/docs/make.bat
@@ -9,7 +9,7 @@ if "%SPHINXBUILD%" == "" (
 )
 set SOURCEDIR=.
 set BUILDDIR=_build
-set SPHINXPROJ=greenguard
+set SPHINXPROJ=draco
 
 if "%1" == "" goto help
 
diff --git a/greenguard/__init__.py b/draco/__init__.py
similarity index 67%
rename from greenguard/__init__.py
rename to draco/__init__.py
index 4d310c6..885fac1 100644
--- a/greenguard/__init__.py
+++ b/draco/__init__.py
@@ -1,14 +1,14 @@
 # -*- coding: utf-8 -*-
 
-"""Top-level package for GreenGuard."""
+"""Top-level package for Draco."""
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.3.1.dev0'
+__version__ = '0.0.1.dev0'
 
 import os
 
-from greenguard.pipeline import GreenGuardPipeline, get_pipelines
+from draco.pipeline import DracoPipeline, get_pipelines
 
 _BASE_PATH = os.path.abspath(os.path.dirname(__file__))
 MLBLOCKS_PIPELINES = os.path.join(_BASE_PATH, 'pipelines')
@@ -16,6 +16,6 @@
 
 
 __all__ = (
-    'GreenGuardPipeline',
+    'DracoPipeline',
     'get_pipelines',
 )
diff --git a/greenguard/benchmark.py b/draco/benchmark.py
similarity index 97%
rename from greenguard/benchmark.py
rename to draco/benchmark.py
index f638138..712bc6e 100644
--- a/greenguard/benchmark.py
+++ b/draco/benchmark.py
@@ -14,13 +14,12 @@
 from sklearn.model_selection import train_test_split
 from tqdm import tqdm
 
-from greenguard import get_pipelines
-from greenguard.demo import load_demo
-from greenguard.loaders import CSVLoader
-from greenguard.metrics import (METRICS, accuracy_score, f1_score,
-                                fpr_score, tpr_score, threshold_score)
-from greenguard.pipeline import GreenGuardPipeline, generate_init_params, generate_preprocessing
-from greenguard.results import load_results, write_results
+from draco import get_pipelines
+from draco.demo import load_demo
+from draco.loaders import CSVLoader
+from draco.metrics import METRICS, accuracy_score, f1_score, fpr_score, tpr_score, threshold_score
+from draco.pipeline import DracoPipeline, generate_init_params, generate_preprocessing
+from draco.results import load_results, write_results
 
 LOGGER = logging.getLogger(__name__)
 
@@ -134,7 +133,7 @@ def evaluate_template(
 
         metric (function or str):
             Metric to use. If an ``str`` is give it must be one of the metrics
-            defined in the ``greenguard.metrics.METRICS`` dictionary.
+            defined in the ``draco.metrics.METRICS`` dictionary.
         tuning_iterations (int):
             Number of iterations to be used.
         preprocessing (int, list or dict):
@@ -164,7 +163,7 @@ def evaluate_template(
 
     train, test = train_test_split(target_times, test_size=test_size, random_state=random_state)
 
-    pipeline = GreenGuardPipeline(
+    pipeline = DracoPipeline(
         template,
         metric=tuning_metric,
         cost=cost,
@@ -286,7 +285,7 @@ def evaluate_templates(
             List of tuples (int, str or Timedelta object).
         metric (function or str):
             Metric to use. If an ``str`` is give it must be one of the metrics
-            defined in the ``greenguard.metrics.METRICS`` dictionary.
+            defined in the ``draco.metrics.METRICS`` dictionary.
         tuning_iterations (int):
             Number of iterations to be used.
         init_params (dict):
@@ -625,7 +624,7 @@ def run_benchmark(templates, problems, window_size_resample_rule=None,
             Defaults to ``None``.
         metric (function or str):
             Metric to use. If an ``str`` is give it must be one of the metrics
-            defined in the ``greenguard.metrics.METRICS`` dictionary.
+            defined in the ``draco.metrics.METRICS`` dictionary.
         cost (bool):
             Whether the metric is a cost function (the lower the better) or not.
             Defaults to ``False``.
@@ -826,13 +825,13 @@ def _make_problems(args):
 
 
 def _get_parser():
-    parser = argparse.ArgumentParser(description='GreenGuard Benchmark Command Line Interface.')
+    parser = argparse.ArgumentParser(description='Draco Benchmark Command Line Interface.')
     parser.set_defaults(action=None)
     action = parser.add_subparsers(title='action')
     action.required = True
 
     # Run action
-    run = action.add_parser('run', help='Run the GreenGuard Benchmark')
+    run = action.add_parser('run', help='Run the Draco Benchmark')
     run.set_defaults(action=_run)
     run.set_defaults(user=None)
 
@@ -878,13 +877,13 @@ def _get_parser():
 
     # Summarize action
     summary = action.add_parser('summarize-results',
-                                help='Summarize the GreenGuard Benchmark results')
+                                help='Summarize the Draco Benchmark results')
     summary.set_defaults(action=_summarize_results)
     summary.add_argument('input', nargs='+', help='Input path with results.')
     summary.add_argument('output', help='Output file.')
 
     # Make problems action
-    problems = action.add_parser('make-problems', help='Create GreenGuard problems')
+    problems = action.add_parser('make-problems', help='Create Draco problems')
     problems.set_defaults(action=_make_problems)
     problems.add_argument('target-times-paths', nargs='+', help='List of target times paths.')
     problems.add_argument('readings-path', type=str, help='Path to the readings folder.')
diff --git a/greenguard/db.py b/draco/db.py
similarity index 97%
rename from greenguard/db.py
rename to draco/db.py
index 99da2fd..053e01b 100644
--- a/greenguard/db.py
+++ b/draco/db.py
@@ -7,7 +7,7 @@
 
 from pymongo import MongoClient
 
-from greenguard.utils import remove_dots, restore_dots
+from draco.utils import remove_dots, restore_dots
 
 LOGGER = logging.getLogger(__name__)
 
diff --git a/greenguard/demo.py b/draco/demo.py
similarity index 95%
rename from greenguard/demo.py
rename to draco/demo.py
index 429e0e9..f7848e5 100644
--- a/greenguard/demo.py
+++ b/draco/demo.py
@@ -28,10 +28,10 @@ def _load_or_download(filename, dates):
 
 
 def load_demo(load_readings=True):
-    """Load the demo included in the GreenGuard project.
+    """Load the demo included in the Draco project.
 
     The first time that this function is executed, the data will be downloaded
-    and cached inside the `greenguard/demo` folder.
+    and cached inside the `draco/demo` folder.
     Subsequent calls will load the cached data instead of downloading it again.
 
     Returns:
diff --git a/draco/loaders/__init__.py b/draco/loaders/__init__.py
new file mode 100644
index 0000000..5f25f4c
--- /dev/null
+++ b/draco/loaders/__init__.py
@@ -0,0 +1,5 @@
+from draco.loaders.csv import CSVLoader
+
+__all__ = (
+    'CSVLoader',
+)
diff --git a/greenguard/loaders/csv.py b/draco/loaders/csv.py
similarity index 99%
rename from greenguard/loaders/csv.py
rename to draco/loaders/csv.py
index 97d33ee..d50229c 100644
--- a/greenguard/loaders/csv.py
+++ b/draco/loaders/csv.py
@@ -4,7 +4,7 @@
 import dask
 import pandas as pd
 
-from greenguard.targets import drop_duplicates, select_valid_targets
+from draco.targets import drop_duplicates, select_valid_targets
 
 LOGGER = logging.getLogger(__name__)
 
diff --git a/greenguard/metrics.py b/draco/metrics.py
similarity index 100%
rename from greenguard/metrics.py
rename to draco/metrics.py
diff --git a/greenguard/pipeline.py b/draco/pipeline.py
similarity index 97%
rename from greenguard/pipeline.py
rename to draco/pipeline.py
index 2a9cd84..b50567b 100644
--- a/greenguard/pipeline.py
+++ b/draco/pipeline.py
@@ -20,7 +20,7 @@
 from sklearn.exceptions import NotFittedError
 from sklearn.model_selection import KFold, StratifiedKFold
 
-from greenguard.metrics import METRICS
+from draco.metrics import METRICS
 
 LOGGER = logging.getLogger(__name__)
 
@@ -152,11 +152,11 @@ def generate_preprocessing(templates_names, preprocessing):
 SELF_THRESHOLD = object()
 
 
-class GreenGuardPipeline(object):
-    """Main Machine Learning component in the GreenGuard project.
+class DracoPipeline(object):
+    """Main Machine Learning component in the Draco project.
 
-    The ``GreenGuardPipeline`` represents the abstraction of a Machine
-    Learning pipeline architecture specialized on the GreenGuard data
+    The ``DracoPipeline`` represents the abstraction of a Machine
+    Learning pipeline architecture specialized on the Draco data
     format.
 
     In order to use it, an MLBlocks pipeline template needs to be given,
@@ -169,7 +169,7 @@ class GreenGuardPipeline(object):
         template_name:
             Name of the template being used.
         fitted (bool):
-            Whether this GreenGuardPipeline has already been fitted or not.
+            Whether this DracoPipeline has already been fitted or not.
         steps (list):
             List of primitives that compose this template.
         preprocessing (list):
@@ -192,7 +192,7 @@ class GreenGuardPipeline(object):
             ``MLPipeline``. Also can be a list combining both.
         metric (str or function):
             Metric to use. If an ``str`` is given it must be one of the metrics
-            defined in the ``greenguard.metrics.METRICS`` dictionary.
+            defined in the ``draco.metrics.METRICS`` dictionary.
         cost (bool):
             Whether the metric is a cost function (the lower the better) or not.
             Defaults to ``False``.
@@ -622,8 +622,8 @@ def load(cls, path):
                 Path to the file where the pipeline is saved.
 
         Returns:
-            GreenGuardPipeline:
-                Loaded GreenGuardPipeline instance.
+            DracoPipeline:
+                Loaded DracoPipeline instance.
         """
         with open(path, 'rb') as pickle_file:
             return cloudpickle.load(pickle_file)
diff --git a/greenguard/pipelines/classes/normalize_dfs_xgb_classifier.json b/draco/pipelines/classes/normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/classes/normalize_dfs_xgb_classifier.json
rename to draco/pipelines/classes/normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/classes/unstack_dfs_xgb_classifier.json b/draco/pipelines/classes/unstack_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/classes/unstack_dfs_xgb_classifier.json
rename to draco/pipelines/classes/unstack_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/classes/unstack_double_lstm_timeseries_classifier.json b/draco/pipelines/classes/unstack_double_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/classes/unstack_double_lstm_timeseries_classifier.json
rename to draco/pipelines/classes/unstack_double_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/classes/unstack_lstm_timeseries_classifier.json b/draco/pipelines/classes/unstack_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/classes/unstack_lstm_timeseries_classifier.json
rename to draco/pipelines/classes/unstack_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json b/draco/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json
rename to draco/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/disabled/dfs_xgb_classifier.json b/draco/pipelines/disabled/dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/disabled/dfs_xgb_classifier.json
rename to draco/pipelines/disabled/dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/disabled/normalize_dfs_xgb_classifier.json b/draco/pipelines/disabled/normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/disabled/normalize_dfs_xgb_classifier.json
rename to draco/pipelines/disabled/normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/disabled/resample_dfs_xgb_classifier.json b/draco/pipelines/disabled/resample_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/disabled/resample_dfs_xgb_classifier.json
rename to draco/pipelines/disabled/resample_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json b/draco/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json
rename to draco/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json b/draco/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json
rename to draco/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json b/draco/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json
rename to draco/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json b/draco/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json
rename to draco/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json b/draco/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json
rename to draco/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/probability/normalize_dfs_xgb_classifier.json b/draco/pipelines/probability/normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/probability/normalize_dfs_xgb_classifier.json
rename to draco/pipelines/probability/normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/probability/unstack_dfs_xgb_classifier.json b/draco/pipelines/probability/unstack_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/probability/unstack_dfs_xgb_classifier.json
rename to draco/pipelines/probability/unstack_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/probability/unstack_double_lstm_timeseries_classifier.json b/draco/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
rename to draco/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/probability/unstack_lstm_timeseries_classifier.json b/draco/pipelines/probability/unstack_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/probability/unstack_lstm_timeseries_classifier.json
rename to draco/pipelines/probability/unstack_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json b/draco/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json
rename to draco/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/unstacked/unstacked_dfs_xgb_classifier.json b/draco/pipelines/unstacked/unstacked_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstacked/unstacked_dfs_xgb_classifier.json
rename to draco/pipelines/unstacked/unstacked_dfs_xgb_classifier.json
diff --git a/greenguard/pipelines/unstacked/unstacked_double_lstm_timeseries_classifier.json b/draco/pipelines/unstacked/unstacked_double_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstacked/unstacked_double_lstm_timeseries_classifier.json
rename to draco/pipelines/unstacked/unstacked_double_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/unstacked/unstacked_lstm_timeseries_classifier.json b/draco/pipelines/unstacked/unstacked_lstm_timeseries_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstacked/unstacked_lstm_timeseries_classifier.json
rename to draco/pipelines/unstacked/unstacked_lstm_timeseries_classifier.json
diff --git a/greenguard/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json b/draco/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json
similarity index 100%
rename from greenguard/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json
rename to draco/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json
diff --git a/greenguard/primitives/numpy.take.json b/draco/primitives/numpy.take.json
similarity index 100%
rename from greenguard/primitives/numpy.take.json
rename to draco/primitives/numpy.take.json
diff --git a/greenguard/primitives/xgboost.XGBClassifier:probabilities.json b/draco/primitives/xgboost.XGBClassifier:probabilities.json
similarity index 100%
rename from greenguard/primitives/xgboost.XGBClassifier:probabilities.json
rename to draco/primitives/xgboost.XGBClassifier:probabilities.json
diff --git a/greenguard/results.py b/draco/results.py
similarity index 100%
rename from greenguard/results.py
rename to draco/results.py
diff --git a/greenguard/targets.py b/draco/targets.py
similarity index 100%
rename from greenguard/targets.py
rename to draco/targets.py
diff --git a/greenguard/utils.py b/draco/utils.py
similarity index 100%
rename from greenguard/utils.py
rename to draco/utils.py
diff --git a/greenguard/loaders/__init__.py b/greenguard/loaders/__init__.py
deleted file mode 100644
index 0113f15..0000000
--- a/greenguard/loaders/__init__.py
+++ /dev/null
@@ -1,5 +0,0 @@
-from greenguard.loaders.csv import CSVLoader
-
-__all__ = (
-    'CSVLoader',
-)
diff --git a/setup.cfg b/setup.cfg
index ee6b598..d2829c0 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.1.dev0
+current_version = 0.0.1.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
@@ -20,7 +20,7 @@ values =
 search = version='{current_version}'
 replace = version='{new_version}'
 
-[bumpversion:file:greenguard/__init__.py]
+[bumpversion:file:draco/__init__.py]
 search = __version__ = '{current_version}'
 replace = __version__ = '{new_version}'
 
diff --git a/setup.py b/setup.py
index 79eeb5b..1305aaf 100644
--- a/setup.py
+++ b/setup.py
@@ -57,6 +57,7 @@
     'nbsphinx>=0.5.0,<0.7',
     'Sphinx>=1.7.1,<3',
     'sphinx_rtd_theme>=0.2.4,<0.5',
+    'docutils>=0.14,<0.18',
     'autodocsumm>=0.1.10',
 
     # style check
@@ -93,8 +94,8 @@
     description='AutoML for Renewable Energy Industries.',
     entry_points={
         'mlblocks': [
-            'pipelines=greenguard:MLBLOCKS_PIPELINES',
-            'primitives=greenguard:MLBLOCKS_PRIMITIVES'
+            'pipelines=draco:MLBLOCKS_PIPELINES',
+            'primitives=draco:MLBLOCKS_PRIMITIVES'
         ],
     },
     extras_require={
@@ -103,17 +104,17 @@
     },
     include_package_data=True,
     install_requires=install_requires,
-    keywords='wind machine learning greenguard',
+    keywords='wind machine learning draco',
     license='MIT license',
     long_description=readme + '\n\n' + history,
     long_description_content_type='text/markdown',
-    name='greenguard',
-    packages=find_packages(include=['greenguard', 'greenguard.*']),
+    name='draco-ml',
+    packages=find_packages(include=['draco', 'draco.*']),
     python_requires='>=3.6,<3.9',
     setup_requires=setup_requires,
     test_suite='tests',
     tests_require=tests_require,
-    url='https://github.com/D3-AI/GreenGuard',
-    version='0.3.1.dev0',
+    url='https://github.com/sintel-dev/Draco',
+    version='0.0.1.dev0',
     zip_safe=False,
 )
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index ac87cc2..5d6f116 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -1,8 +1,8 @@
-"""Tests for `greenguard.benchmark` module."""
+"""Tests for `draco.benchmark` module."""
 import numpy as np
 
-from greenguard.benchmark import evaluate_templates
-from greenguard.demo import load_demo
+from draco.benchmark import evaluate_templates
+from draco.demo import load_demo
 
 
 def test_predict():
diff --git a/tests/test_metrics.py b/tests/test_metrics.py
index ce14132..a942669 100644
--- a/tests/test_metrics.py
+++ b/tests/test_metrics.py
@@ -1,6 +1,6 @@
 import numpy as np
 
-from greenguard.metrics import fpr_score
+from draco.metrics import fpr_score
 
 
 def test_fpr_score_perfect_scenario():
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 541ad6f..725d299 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -1,16 +1,16 @@
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
-"""Tests for `greenguard.pipeline` module."""
+"""Tests for `draco.pipeline` module."""
 from unittest import TestCase
 from unittest.mock import patch
 
 import pandas as pd
 
-from greenguard.pipeline import GreenGuardPipeline
+from draco.pipeline import DracoPipeline
 
 
-class TestGreenGuardPipeline(TestCase):
+class TestDracoPipeline(TestCase):
 
     def _get_data(self):
         target_times = pd.DataFrame({
@@ -26,26 +26,26 @@ def _get_data(self):
         })
         return target_times, readings
 
-    @patch('greenguard.pipeline.MLPipeline')
-    @patch('greenguard.pipeline.load_pipeline')
+    @patch('draco.pipeline.MLPipeline')
+    @patch('draco.pipeline.load_pipeline')
     def test_fit(self, load_pipeline_mock, mlpipeline_mock):
         load_pipeline_mock.return_value = dict()
 
         # Run
-        instance = GreenGuardPipeline('a_pipeline', 'accuracy')
+        instance = DracoPipeline('a_pipeline', 'accuracy')
         target_times, readings = self._get_data()
         instance.fit(target_times, readings)
 
         # Asserts
         assert instance.fitted
 
-    @patch('greenguard.pipeline.MLPipeline')
-    @patch('greenguard.pipeline.load_pipeline')
+    @patch('draco.pipeline.MLPipeline')
+    @patch('draco.pipeline.load_pipeline')
     def test_predict(self, load_pipeline_mock, mlpipeline_mock):
         load_pipeline_mock.return_value = dict()
 
         # Run
-        instance = GreenGuardPipeline('a_pipeline', 'accuracy')
+        instance = DracoPipeline('a_pipeline', 'accuracy')
         instance.fitted = True
         target_times, readings = self._get_data()
         instance.predict(target_times, readings)
diff --git a/tutorials/01_GreenGuard_Machine_Learning.ipynb b/tutorials/01_Draco_Machine_Learning.ipynb
similarity index 89%
rename from tutorials/01_GreenGuard_Machine_Learning.ipynb
rename to tutorials/01_Draco_Machine_Learning.ipynb
index 03a2aa0..4a5fde7 100644
--- a/tutorials/01_GreenGuard_Machine_Learning.ipynb
+++ b/tutorials/01_Draco_Machine_Learning.ipynb
@@ -4,21 +4,21 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# GreenGuard Machine Learning"
+    "# Draco Machine Learning"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "In this tutorial we will show you how to use GreenGuard to solve a Machine Learning problem\n",
+    "In this tutorial we will show you how to use Draco to solve a Machine Learning problem\n",
     "defined via a Target Times table.\n",
     "\n",
     "During the next steps we will:\n",
     "\n",
     "- Load demo target times and readings\n",
     "- Find available pipelines and load two of them as templates\n",
-    "- Use GreenGuard AutoML to select the best template and hyperparameters for our problem\n",
+    "- Use Draco AutoML to select the best template and hyperparameters for our problem\n",
     "- Build and fit a Machine Learning pipeline based on the found template and hyperparameters\n",
     "- Make predictions using the fitted pipeline\n",
     "- Evaluate how good the predictions are"
@@ -31,7 +31,7 @@
     "## 0. Setup the logging\n",
     "\n",
     "This step sets up logging in our environment to increase our visibility over\n",
-    "the steps that GreenGuard performs."
+    "the steps that Draco performs."
    ]
   },
   {
@@ -57,7 +57,7 @@
     "\n",
     "The first step is to load the data that we are going to use.\n",
     "\n",
-    "In order to use the demo data included in GreenGuard, the `greenguard.demo.load_demo` function can be used."
+    "In order to use the demo data included in Draco, the `draco.demo.load_demo` function can be used."
    ]
   },
   {
@@ -66,7 +66,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.demo import load_demo\n",
+    "from draco.demo import load_demo\n",
     "\n",
     "target_times, readings = load_demo()"
    ]
@@ -75,11 +75,11 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "This will download some demo data from [GreenGuard S3 demo Bucket](\n",
-    "https://d3-ai-greenguard.s3.amazonaws.com/index.html)%20and%20load%20it%20as/n",
+    "This will download some demo data from [Draco S3 demo Bucket](\n",
+    "https://d3-ai-draco.s3.amazonaws.com/index.html)%20and%20load%20it%20as/n",
     "the necessary `target_times` and `readings` tables.\n",
     "\n",
-    "The exact format of these tables is described in the GreenGuard README and docs:"
+    "The exact format of these tables is described in the Draco README and docs:"
    ]
   },
   {
@@ -399,11 +399,11 @@
     "## 3. Finding the available Templates\n",
     "\n",
     "The next step will be to select a collection of templates from the ones\n",
-    "available in GreenGuard.\n",
+    "available in Draco.\n",
     "\n",
-    "For this, we can use the `greenguard.get_pipelines` function, which will\n",
+    "For this, we can use the `draco.get_pipelines` function, which will\n",
     "return us the list of all the available MLBlocks pipelines found in the\n",
-    "GreenGuard system."
+    "Draco system."
    ]
   },
   {
@@ -444,7 +444,7 @@
     }
    ],
    "source": [
-    "from greenguard import get_pipelines\n",
+    "from draco import get_pipelines\n",
     "\n",
     "get_pipelines()"
    ]
@@ -505,20 +505,20 @@
     {
      "data": {
       "text/plain": [
-       "{'unstacked.unstacked_normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json',\n",
-       " 'unstacked.unstacked_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/unstacked/unstacked_dfs_xgb_classifier.json',\n",
-       " 'classes.unstack_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/classes/unstack_dfs_xgb_classifier.json',\n",
-       " 'classes.normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/classes/normalize_dfs_xgb_classifier.json',\n",
-       " 'classes.unstack_normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json',\n",
-       " 'disabled.resample_normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json',\n",
-       " 'disabled.resample_unstack_normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json',\n",
-       " 'disabled.normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/normalize_dfs_xgb_classifier.json',\n",
-       " 'disabled.resample_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/resample_dfs_xgb_classifier.json',\n",
-       " 'disabled.resample_unstack_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json',\n",
-       " 'disabled.dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/disabled/dfs_xgb_classifier.json',\n",
-       " 'probability.unstack_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/probability/unstack_dfs_xgb_classifier.json',\n",
-       " 'probability.normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/probability/normalize_dfs_xgb_classifier.json',\n",
-       " 'probability.unstack_normalize_dfs_xgb_classifier': '/GreenGuard/greenguard/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json'}"
+       "{'unstacked.unstacked_normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json',\n",
+       " 'unstacked.unstacked_dfs_xgb_classifier': '/Draco/draco/pipelines/unstacked/unstacked_dfs_xgb_classifier.json',\n",
+       " 'classes.unstack_dfs_xgb_classifier': '/Draco/draco/pipelines/classes/unstack_dfs_xgb_classifier.json',\n",
+       " 'classes.normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/classes/normalize_dfs_xgb_classifier.json',\n",
+       " 'classes.unstack_normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json',\n",
+       " 'disabled.resample_normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json',\n",
+       " 'disabled.resample_unstack_normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json',\n",
+       " 'disabled.normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/normalize_dfs_xgb_classifier.json',\n",
+       " 'disabled.resample_dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/resample_dfs_xgb_classifier.json',\n",
+       " 'disabled.resample_unstack_dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json',\n",
+       " 'disabled.dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/dfs_xgb_classifier.json',\n",
+       " 'probability.unstack_dfs_xgb_classifier': '/Draco/draco/pipelines/probability/unstack_dfs_xgb_classifier.json',\n",
+       " 'probability.normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/probability/normalize_dfs_xgb_classifier.json',\n",
+       " 'probability.unstack_normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json'}"
       ]
      },
      "execution_count": 12,
@@ -568,7 +568,7 @@
    "source": [
     "## 4. Finding the best Pipeline\n",
     "\n",
-    "Once we have loaded the data, we create a **GreenGuardPipeline** instance by passing:\n",
+    "Once we have loaded the data, we create a **DracoPipeline** instance by passing:\n",
     "\n",
     "* `templates (string or list)`: the name of a template, the path to a template json file or\n",
     "a list that can combine both of them.\n",
@@ -589,9 +589,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.pipeline import GreenGuardPipeline\n",
+    "from draco.pipeline import DracoPipeline\n",
     "\n",
-    "pipeline = GreenGuardPipeline(templates, metric='f1', cv_splits=3)"
+    "pipeline = DracoPipeline(templates, metric='f1', cv_splits=3)"
    ]
   },
   {
@@ -632,7 +632,7 @@
      "output_type": "stream",
      "text": [
       "INFO:btb.session:Obtaining default configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: classes.unstack_normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
@@ -817,7 +817,7 @@
      "text": [
       "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
       "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: classes.unstack_normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 97\n",
@@ -986,7 +986,7 @@
     "will probably want to save a fitted instance and load it later to analyze new signals\n",
     "instead of fitting pipelines over and over again.\n",
     "\n",
-    "This can be done by using the `save` and `load` methods from the `GreenGuardPipeline`.\n",
+    "This can be done by using the `save` and `load` methods from the `DracoPipeline`.\n",
     "\n",
     "In order to save an instance, call its `save` method passing it the path and filename\n",
     "where the model should be saved."
@@ -1007,8 +1007,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Once the pipeline is saved, it can be loaded back as a new `GreenGuardPipeline` by using the\n",
-    "`GreenGuardPipeline.load` method:"
+    "Once the pipeline is saved, it can be loaded back as a new `DracoPipeline` by using the\n",
+    "`DracoPipeline.load` method:"
    ]
   },
   {
@@ -1017,7 +1017,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "new_pipeline = GreenGuardPipeline.load(path)"
+    "new_pipeline = DracoPipeline.load(path)"
    ]
   },
   {
diff --git a/tutorials/02_Extract_Readings.ipynb b/tutorials/02_Extract_Readings.ipynb
index a454648..1713a07 100644
--- a/tutorials/02_Extract_Readings.ipynb
+++ b/tutorials/02_Extract_Readings.ipynb
@@ -10,7 +10,7 @@
     "from a folder that contains readings in the raw CSV format.\n",
     "\n",
     "The Raw CSV format es briefly explained below, but more details can be found in [the documentation site](\n",
-    "https://signals-dev.github.io/GreenGuard/advanced_usage/csv.html)/n",
+    "https://sintel-dev.github.io/Draco/advanced_usage/csv.html)/n",
     "\n",
     "During the next steps we will:\n",
     "\n",
@@ -29,7 +29,7 @@
     "## 0. Setup the logging\n",
     "\n",
     "This step sets up logging in our environment to increase our visibility over\n",
-    "the steps that GreenGuard performs."
+    "the steps that Draco performs."
    ]
   },
   {
@@ -70,23 +70,23 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.demo:Generating file readings/T001/2013-01.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-02.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-03.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-04.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-05.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-06.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-07.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-08.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-09.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-10.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-11.csv\n",
-      "INFO:greenguard.demo:Generating file readings/T001/2013-12.csv\n"
+      "INFO:draco.demo:Generating file readings/T001/2013-01.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-02.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-03.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-04.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-05.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-06.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-07.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-08.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-09.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-10.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-11.csv\n",
+      "INFO:draco.demo:Generating file readings/T001/2013-12.csv\n"
      ]
     }
    ],
    "source": [
-    "from greenguard.demo import generate_raw_readings\n",
+    "from draco.demo import generate_raw_readings\n",
     "\n",
     "target_times = generate_raw_readings('readings')"
    ]
@@ -393,7 +393,7 @@
     "them into memory all at once.\n",
     "\n",
     "In order to load them in an efficient way so that we can use them to solve Machine Learning\n",
-    "problems, GeenGuard provides the `greenguard.loaders.CVSLoader` class.\n",
+    "problems, GeenGuard provides the `draco.loaders.CVSLoader` class.\n",
     "\n",
     "This class is prepared to, given a target times table, explore a collection of raw readings\n",
     "and extract only the information needed to solve that particular problem.\n",
@@ -419,7 +419,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.loaders import CSVLoader\n",
+    "from draco.loaders import CSVLoader\n",
     "\n",
     "readings_path = 'readings'\n",
     "\n",
@@ -453,9 +453,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.loaders.csv:Loaded 1306052 readings from turbine T001\n",
-      "INFO:greenguard.loaders.csv:Loaded 1306052 turbine readings\n",
-      "INFO:greenguard.targets:Dropped 0 targets without enough data. Final target_times size: 353\n"
+      "INFO:draco.loaders.csv:Loaded 1306052 readings from turbine T001\n",
+      "INFO:draco.loaders.csv:Loaded 1306052 turbine readings\n",
+      "INFO:draco.targets:Dropped 0 targets without enough data. Final target_times size: 353\n"
      ]
     }
    ],
@@ -646,9 +646,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.loaders.csv:Loaded 1309796 readings from turbine T001\n",
-      "INFO:greenguard.loaders.csv:Loaded 1309796 turbine readings\n",
-      "INFO:greenguard.targets:Dropped 28 targets without enough data. Final target_times size: 325\n"
+      "INFO:draco.loaders.csv:Loaded 1309796 readings from turbine T001\n",
+      "INFO:draco.loaders.csv:Loaded 1309796 turbine readings\n",
+      "INFO:draco.targets:Dropped 28 targets without enough data. Final target_times size: 325\n"
      ]
     }
    ],
@@ -763,33 +763,33 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:81749 readings reduced to 3432\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:110938 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:112118 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:111862 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:114400 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:105321 readings reduced to 4550\n",
-      "INFO:greenguard.loaders.csv:108371 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:115615 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:115647 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:103319 readings reduced to 4368\n",
-      "INFO:greenguard.loaders.csv:115979 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:114477 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:Loaded 55250 readings from turbine T001\n",
-      "INFO:greenguard.loaders.csv:Loaded 55250 turbine readings\n",
-      "INFO:greenguard.targets:Dropped 12 targets without enough data. Final target_times size: 341\n"
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:81749 readings reduced to 3432\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:110938 readings reduced to 4680\n",
+      "INFO:draco.loaders.csv:112118 readings reduced to 4680\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:111862 readings reduced to 4680\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:114400 readings reduced to 4836\n",
+      "INFO:draco.loaders.csv:105321 readings reduced to 4550\n",
+      "INFO:draco.loaders.csv:108371 readings reduced to 4680\n",
+      "INFO:draco.loaders.csv:115615 readings reduced to 4836\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:115647 readings reduced to 4836\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:103319 readings reduced to 4368\n",
+      "INFO:draco.loaders.csv:115979 readings reduced to 4836\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:114477 readings reduced to 4836\n",
+      "INFO:draco.loaders.csv:Loaded 55250 readings from turbine T001\n",
+      "INFO:draco.loaders.csv:Loaded 55250 turbine readings\n",
+      "INFO:draco.targets:Dropped 12 targets without enough data. Final target_times size: 341\n"
      ]
     }
    ],
@@ -940,7 +940,7 @@
    "source": [
     "## 4. Unstacking\n",
     "\n",
-    "Some of the pipelines included in **GreenGuard** expect a slightly different input format\n",
+    "Some of the pipelines included in **Draco** expect a slightly different input format\n",
     "where the data has been unstacked by `signal_id`, putting the values of each signal in a\n",
     "different column instead of having all of them in a single one.\n",
     "\n",
@@ -959,33 +959,33 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:108371 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:115647 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:103319 readings reduced to 4368\n",
-      "INFO:greenguard.loaders.csv:115615 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:114400 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:114477 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:115979 readings reduced to 4836\n",
-      "INFO:greenguard.loaders.csv:111862 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:81749 readings reduced to 3432\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:105321 readings reduced to 4550\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:Resampling: 4h - mean\n",
-      "INFO:greenguard.loaders.csv:112118 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:110938 readings reduced to 4680\n",
-      "INFO:greenguard.loaders.csv:Loaded 2125 readings from turbine T001\n",
-      "INFO:greenguard.loaders.csv:Loaded 2125 turbine readings\n",
-      "INFO:greenguard.targets:Dropped 12 targets without enough data. Final target_times size: 341\n"
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:108371 readings reduced to 4680\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:115647 readings reduced to 4836\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:103319 readings reduced to 4368\n",
+      "INFO:draco.loaders.csv:115615 readings reduced to 4836\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:114400 readings reduced to 4836\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:114477 readings reduced to 4836\n",
+      "INFO:draco.loaders.csv:115979 readings reduced to 4836\n",
+      "INFO:draco.loaders.csv:111862 readings reduced to 4680\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:81749 readings reduced to 3432\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:105321 readings reduced to 4550\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:Resampling: 4h - mean\n",
+      "INFO:draco.loaders.csv:112118 readings reduced to 4680\n",
+      "INFO:draco.loaders.csv:110938 readings reduced to 4680\n",
+      "INFO:draco.loaders.csv:Loaded 2125 readings from turbine T001\n",
+      "INFO:draco.loaders.csv:Loaded 2125 turbine readings\n",
+      "INFO:draco.targets:Dropped 12 targets without enough data. Final target_times size: 341\n"
      ]
     }
    ],
diff --git a/tutorials/03_Benchmarking.ipynb b/tutorials/03_Benchmarking.ipynb
index 56e8701..c7bce62 100644
--- a/tutorials/03_Benchmarking.ipynb
+++ b/tutorials/03_Benchmarking.ipynb
@@ -14,7 +14,7 @@
     "## 0. Setup the logging\n",
     "\n",
     "This step sets up logging in our environment to increase our visibility over\n",
-    "the steps that GreenGuard performs."
+    "the steps that Draco performs."
    ]
   },
   {
@@ -27,7 +27,7 @@
     "\n",
     "logging.basicConfig(level=logging.INFO)\n",
     "logging.getLogger().setLevel(level=logging.ERROR)\n",
-    "logging.getLogger('greenguard').setLevel(level=logging.INFO)\n",
+    "logging.getLogger('draco').setLevel(level=logging.INFO)\n",
     "\n",
     "import warnings\n",
     "warnings.simplefilter(\"ignore\")"
@@ -40,12 +40,12 @@
     "\n",
     "## Running the Benchmarking\n",
     "\n",
-    "The user API for the GreenGuard Benchmarking is the `greenguard.benchmark.evaluate_templates` function.\n",
+    "The user API for the Draco Benchmarking is the `draco.benchmark.evaluate_templates` function.\n",
     "\n",
     "The `evaluate_templates` function accepts the following arguments:\n",
     "* `templates (list)`: List of templates to try.\n",
     "* `window_size_rule (list)`: List of tupples (int, str or Timedelta object).\n",
-    "* `metric (function or str)`: Metric to use. If an ``str`` is give it must be one of the metrics defined in the `greenguard.metrics.METRICS` dictionary.\n",
+    "* `metric (function or str)`: Metric to use. If an ``str`` is give it must be one of the metrics defined in the `draco.metrics.METRICS` dictionary.\n",
     "* `tuning_iterations (int)`: Number of iterations to be used.\n",
     "* `init_params (dict)`: Initialization parameters for the pipelines.\n",
     "* `target_times (DataFrame)`: Contains the specefication problem that we are solving, which has three columns:\n",
@@ -100,45 +100,45 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:greenguard.benchmark:Evaluating template probability.unstack_lstm_timeseries_classifier on problem None (1d, 1h)\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.benchmark:Evaluating template probability.unstack_lstm_timeseries_classifier on problem None (1d, 1h)\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: probability.unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: probability.unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): constant\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 287\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.565737233372491\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 145\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: probability.unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): constant\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 269\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.5973752345055594\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 412\n",
-      "INFO:greenguard.benchmark:Evaluating template probability.unstack_lstm_timeseries_classifier on problem None (2d, 2h)\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.benchmark:Evaluating template probability.unstack_lstm_timeseries_classifier on problem None (2d, 2h)\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: probability.unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: probability.unstack_lstm_timeseries_classifier \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 114\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.016427744327526084\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 224\n",
-      "INFO:greenguard.benchmark:Evaluating template probability.normalize_dfs_xgb_classifier on problem None (1d, 1h)\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.benchmark:Evaluating template probability.normalize_dfs_xgb_classifier on problem None (1d, 1h)\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: probability.normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
@@ -147,7 +147,7 @@
       "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.1\n",
       "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.0\n",
       "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 1\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: probability.normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 11\n",
@@ -156,7 +156,7 @@
       "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.554989010368875\n",
       "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.909957492053926\n",
       "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 7\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: probability.normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 61\n",
@@ -165,8 +165,8 @@
       "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.6840927016151666\n",
       "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.5480298094360865\n",
       "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 6\n",
-      "INFO:greenguard.benchmark:Evaluating template probability.normalize_dfs_xgb_classifier on problem None (2d, 2h)\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.benchmark:Evaluating template probability.normalize_dfs_xgb_classifier on problem None (2d, 2h)\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: probability.normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
@@ -175,7 +175,7 @@
       "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.1\n",
       "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.0\n",
       "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 1\n",
-      "INFO:greenguard.pipeline:New configuration found:\n",
+      "INFO:draco.pipeline:New configuration found:\n",
       "  Template: probability.normalize_dfs_xgb_classifier \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 99\n",
@@ -188,7 +188,7 @@
     }
    ],
    "source": [
-    "from greenguard.benchmark import evaluate_templates\n",
+    "from draco.benchmark import evaluate_templates\n",
     "\n",
     "results = evaluate_templates(\n",
     "    templates=templates,\n",
diff --git a/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb b/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
index 5fc510e..ca00d58 100644
--- a/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
+++ b/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
@@ -13,7 +13,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.demo import load_demo\n",
+    "from draco.demo import load_demo\n",
     "\n",
     "target_times, readings = load_demo()"
    ]
@@ -33,9 +33,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.pipeline import GreenGuardPipeline\n",
+    "from draco.pipeline import DracoPipeline\n",
     "\n",
-    "pipeline = GreenGuardPipeline(pipeline_name)"
+    "pipeline = DracoPipeline(pipeline_name)"
    ]
   },
   {
@@ -256,7 +256,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Data Preparation (part of GreenGuard Pipeline)\n",
+    "## Data Preparation (part of Draco Pipeline)\n",
     "\n",
     "* Input: target_times, readings, turbines\n",
     "* Output: X, y, readings, turbines\n",
diff --git a/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb b/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
index f44377b..f539e89 100644
--- a/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
+++ b/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
@@ -13,7 +13,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.demo import load_demo\n",
+    "from draco.demo import load_demo\n",
     "\n",
     "target_times, readings = load_demo()"
    ]
@@ -33,9 +33,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.pipeline import GreenGuardPipeline\n",
+    "from draco.pipeline import DracoPipeline\n",
     "\n",
-    "pipeline = GreenGuardPipeline(pipeline_name)"
+    "pipeline = DracoPipeline(pipeline_name)"
    ]
   },
   {
@@ -260,7 +260,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Data Preparation (part of GreenGuard Pipeline)\n",
+    "## Data Preparation (part of Draco Pipeline)\n",
     "\n",
     "* Input: target_times, readings, turbines\n",
     "* Output: X, y, readings, turbines\n",
diff --git a/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb b/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
index ec68b0e..1a10480 100644
--- a/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
+++ b/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
@@ -13,7 +13,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.demo import load_demo\n",
+    "from draco.demo import load_demo\n",
     "\n",
     "target_times, readings = load_demo()"
    ]
@@ -33,9 +33,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.pipeline import GreenGuardPipeline\n",
+    "from draco.pipeline import DracoPipeline\n",
     "\n",
-    "pipeline = GreenGuardPipeline(pipeline_name)"
+    "pipeline = DracoPipeline(pipeline_name)"
    ]
   },
   {
@@ -260,7 +260,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Data Preparation (part of GreenGuard Pipeline)\n",
+    "## Data Preparation (part of Draco Pipeline)\n",
     "\n",
     "* Input: target_times, readings, turbines\n",
     "* Output: X, y, readings, turbines\n",
diff --git a/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb b/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb
index 8fc6c8b..84530a2 100644
--- a/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb
+++ b/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb
@@ -13,7 +13,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.demo import load_demo\n",
+    "from draco.demo import load_demo\n",
     "\n",
     "target_times, readings = load_demo()"
    ]
@@ -33,9 +33,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from greenguard.pipeline import GreenGuardPipeline\n",
+    "from draco.pipeline import DracoPipeline\n",
     "\n",
-    "pipeline = GreenGuardPipeline(pipeline_name)"
+    "pipeline = DracoPipeline(pipeline_name)"
    ]
   },
   {
@@ -256,7 +256,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Data Preparation (part of GreenGuard Pipeline)\n",
+    "## Data Preparation (part of Draco Pipeline)\n",
     "\n",
     "* Input: target_times, readings, turbines\n",
     "* Output: X, y, readings, turbines\n",

From d211141373e51d63a23bbbc8d833f5ccd28b7eb5 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Sun, 19 Dec 2021 04:48:36 +0300
Subject: [PATCH 144/171] Remove support python 3.6 (#62)

---
 .github/workflows/tests.yml | 10 +++++-----
 setup.py                    |  3 +--
 tox.ini                     |  4 +---
 3 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 97dbb0e..5659465 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
@@ -30,7 +30,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
@@ -52,7 +52,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.7, 3.8]
         os: [ubuntu-latest, macos-latest]
     steps:
     - uses: actions/checkout@v1
@@ -71,7 +71,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
@@ -90,7 +90,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.6, 3.7, 3.8]
+        python-version: [3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
diff --git a/setup.py b/setup.py
index 1305aaf..60c095f 100644
--- a/setup.py
+++ b/setup.py
@@ -87,7 +87,6 @@
         'License :: OSI Approved :: MIT License',
         'Natural Language :: English',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
     ],
@@ -110,7 +109,7 @@
     long_description_content_type='text/markdown',
     name='draco-ml',
     packages=find_packages(include=['draco', 'draco.*']),
-    python_requires='>=3.6,<3.9',
+    python_requires='>=3.7,<3.9',
     setup_requires=setup_requires,
     test_suite='tests',
     tests_require=tests_require,
diff --git a/tox.ini b/tox.ini
index 0068931..88295ee 100644
--- a/tox.ini
+++ b/tox.ini
@@ -5,19 +5,17 @@ commands =
     /usr/bin/env make docs
 
 [tox]
-envlist = py3{6,7,8}-{lint,readme,unit,minimum}
+envlist = py3{7,8}-{lint,readme,unit,minimum}
 
 [travis]
 python =
     3.8: py38-lint, py38-readme, py38-unit, py38-minimum, py38-tutorials
     3.7: py37-lint, py37-readme, py37-unit, py37-minimum, py37-tutorials
-    3.6: py36-lint, py36-readme, py36-unit, py36-minimum, py36-tutorials
 
 [gh-actions]
 python =
     3.8: py38-lint, py38-readme, py38-unit, py38-minimum, py38-tutorials
     3.7: py37-lint, py37-readme, py37-unit, py37-minimum, py37-tutorials
-    3.6: py36-lint, py36-readme, py36-unit, py36-minimum, py36-tutorials
 
 [testenv]
 passenv = CI TRAVIS TRAVIS_*

From 1ba40d2518f75f5484707bcdf0322bce56454fad Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Sat, 1 Jan 2022 11:50:51 +0300
Subject: [PATCH 145/171] =?UTF-8?q?Bump=20version:=200.0.1.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.0.1.dev1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index 885fac1..6abe781 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.0.1.dev0'
+__version__ = '0.0.1.dev1'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index d2829c0..1ac6a7a 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.1.dev0
+current_version = 0.0.1.dev1
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 60c095f..e7502d8 100644
--- a/setup.py
+++ b/setup.py
@@ -114,6 +114,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.0.1.dev0',
+    version='0.0.1.dev1',
     zip_safe=False,
 )

From ea3499b4b92c2de8eb5d075520d261b2a106c8eb Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Sat, 1 Jan 2022 12:15:59 +0300
Subject: [PATCH 146/171] prepare release notes

---
 HISTORY.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/HISTORY.md b/HISTORY.md
index e656d1a..c4a54ff 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,11 @@
 # History
 
+## 0.1.0 - 2021-01-01
+
+Rename ``GreenGuard`` to ``Draco``.
+
+* First release on ``draco-ml`` PyPI
+
 ## 0.3.0 - 2021-01-22
 
 This release increases the supported version of python to `3.8` and also includes changes

From cf88c3b703640674022f79b5003036deea32edbf Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Sat, 1 Jan 2022 12:16:20 +0300
Subject: [PATCH 147/171] =?UTF-8?q?Bump=20version:=200.0.1.dev1=20?=
 =?UTF-8?q?=E2=86=92=200.1.0.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index 6abe781..4269cd2 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.0.1.dev1'
+__version__ = '0.1.0.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 1ac6a7a..792c650 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.0.1.dev1
+current_version = 0.1.0.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index e7502d8..cdfd471 100644
--- a/setup.py
+++ b/setup.py
@@ -114,6 +114,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.0.1.dev1',
+    version='0.1.0.dev0',
     zip_safe=False,
 )

From d904b10d884163097cb90f3f10884ff4e619f7e8 Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Sat, 1 Jan 2022 12:18:14 +0300
Subject: [PATCH 148/171] =?UTF-8?q?Bump=20version:=200.1.0.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.1.0.dev1?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index 4269cd2..3caad34 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.1.0.dev0'
+__version__ = '0.1.0.dev1'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 792c650..30db171 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.0.dev0
+current_version = 0.1.0.dev1
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index cdfd471..aa66a24 100644
--- a/setup.py
+++ b/setup.py
@@ -114,6 +114,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.1.0.dev0',
+    version='0.1.0.dev1',
     zip_safe=False,
 )

From 3ac5e0d64a3aa2f2e0fad59bfa9b44c22a6a4ae3 Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Sat, 1 Jan 2022 22:48:38 +0300
Subject: [PATCH 149/171] prepare draco release

---
 HISTORY.md | 36 ++++++++++++++++++------------------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index c4a54ff..c03e597 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -2,18 +2,18 @@
 
 ## 0.1.0 - 2021-01-01
 
-Rename ``GreenGuard`` to ``Draco``.
-
 * First release on ``draco-ml`` PyPI
 
-## 0.3.0 - 2021-01-22
+## Previous GreenGuard development
+
+### 0.3.0 - 2021-01-22
 
 This release increases the supported version of python to `3.8` and also includes changes
 in the installation requirements, where ``pandas`` and ``scikit-optimize`` packages have
 been updated to support higher versions. This changes come together with the newer versions
 of ``MLBlocks`` and ``MLPrimitives``.
 
-### Internal Improvements
+#### Internal Improvements
 
 * Fix ``run_benchmark`` generating properly the ``init_hyperparameters`` for the pipelines.
 * New ``FPR`` metric.
@@ -21,16 +21,16 @@ of ``MLBlocks`` and ``MLPrimitives``.
 * Multiple benchmarking metrics allowed.
 * Multiple ``tpr`` or ``threshold`` values allowed for the benchmark.
 
-## 0.2.6 - 2020-10-23
+### 0.2.6 - 2020-10-23
 
 * Fix ``mkdir`` when exporting to ``csv`` file the benchmark results.
 * Intermediate steps for the pipelines with demo notebooks for each pipeline.
 
-### Resolved Issues
+#### Resolved Issues
 
 * Issue #50: Expose partial outputs and executions in the ``GreenGuardPipeline``.
 
-## 0.2.5 - 2020-10-09
+### 0.2.5 - 2020-10-09
 
 With this release we include:
 
@@ -39,50 +39,50 @@ templates against problems with different window size and resample rules.
 * `summarize_results`: A function that given a `csv` file generates a `xlsx` file with a summary
 tab and a detailed tab with the results from `run_benchmark`.
 
-## 0.2.4 - 2020-09-25
+### 0.2.4 - 2020-09-25
 
 * Fix dependency errors
 
-## 0.2.3 - 2020-08-10
+### 0.2.3 - 2020-08-10
 
 * Added benchmarking module.
 
-## 0.2.2 - 2020-07-10
+### 0.2.2 - 2020-07-10
 
-### Internal Improvements
+#### Internal Improvements
 
 * Added github actions.
 
-### Resolved Issues
+#### Resolved Issues
 
 * Issue #27: Cache Splits pre-processed data on disk
 
-## 0.2.1 - 2020-06-16
+### 0.2.1 - 2020-06-16
 
 With this release we give the possibility to the user to specify more than one template when
 creating a GreenGuardPipeline. When the `tune` method of this is called, an instance of BTBSession
 is returned and it is in charge of selecting the templates and tuning their hyperparameters until
 achieving the best pipeline.
 
-### Internal Improvements
+#### Internal Improvements
 
 * Resample by filename inside the `CSVLoader` to avoid oversampling of data that will not be used.
 * Select targets now allows them to be equal.
 * Fixed the csv filename format.
 * Upgraded to BTB.
 
-### Bug Fixes
+#### Bug Fixes
 
 * Issue #33: Wrong default datetime format
 
-### Resolved Issues
+#### Resolved Issues
 
 * Issue #35: Select targets is too strict
 * Issue #36: resample by filename inside csvloader
 * Issue #39: Upgrade BTB
 * Issue #41: Fix CSV filename format
 
-## 0.2.0 - 2020-02-14
+### 0.2.0 - 2020-02-14
 
 First stable release:
 
@@ -91,6 +91,6 @@ First stable release:
 * optimized pipeline tuning
 * documentation and tutorials
 
-## 0.1.0
+### 0.1.0
 
 * First release on PyPI

From 76e7b73b075d785d7d635de105b16599a71f1383 Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Sat, 1 Jan 2022 22:55:21 +0300
Subject: [PATCH 150/171] =?UTF-8?q?Bump=20version:=200.1.0.dev1=20?=
 =?UTF-8?q?=E2=86=92=200.1.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index 3caad34..759653d 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.1.0.dev1'
+__version__ = '0.1.0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 30db171..3ea3079 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.0.dev1
+current_version = 0.1.0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index aa66a24..8e2b494 100644
--- a/setup.py
+++ b/setup.py
@@ -114,6 +114,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.1.0.dev1',
+    version='0.1.0',
     zip_safe=False,
 )

From be822782964ace5e3bfaf19cbce37b770ece7dac Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Sat, 1 Jan 2022 23:02:26 +0300
Subject: [PATCH 151/171] change python 3.6 to 3.7

---
 docker/Dockerfile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docker/Dockerfile b/docker/Dockerfile
index a086f2b..c750e55 100644
--- a/docker/Dockerfile
+++ b/docker/Dockerfile
@@ -1,4 +1,4 @@
-FROM python:3.6
+FROM python:3.7
 
 ARG UID=1000
 EXPOSE 8888

From b06a6baf59bc3d0be7f446c264a73105e0da5ecf Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Sat, 1 Jan 2022 23:19:28 +0300
Subject: [PATCH 152/171] =?UTF-8?q?Bump=20version:=200.1.0=20=E2=86=92=200?=
 =?UTF-8?q?.1.1.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index 759653d..db53888 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.1.0'
+__version__ = '0.1.1.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 3ea3079..597575d 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.0
+current_version = 0.1.1.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 8e2b494..fefbd90 100644
--- a/setup.py
+++ b/setup.py
@@ -114,6 +114,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.1.0',
+    version='0.1.1.dev0',
     zip_safe=False,
 )

From 5776afa819b5dc490eda0181c0f6e3217a324d41 Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Sun, 2 Jan 2022 19:52:19 +0300
Subject: [PATCH 153/171] amend date

---
 HISTORY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index c03e597..84a28d5 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,6 +1,6 @@
 # History
 
-## 0.1.0 - 2021-01-01
+## 0.1.0 - 2022-01-01
 
 * First release on ``draco-ml`` PyPI
 

From 2c120276acf665de03ea949ffb1037ed17e7628d Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Mon, 31 Jan 2022 11:16:34 +0000
Subject: [PATCH 154/171] python3.6 tests (#65)

* add python3.6 tests
---
 .github/workflows/tests.yml | 12 ++++++------
 setup.py                    |  3 ++-
 tox.ini                     |  4 +++-
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 5659465..d4c79b9 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -11,7 +11,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
@@ -30,7 +30,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
@@ -52,8 +52,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
-        os: [ubuntu-latest, macos-latest]
+        python-version: [3.6, 3.7, 3.8]
+        os: [ubuntu-latest, macos-10.15]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
@@ -71,7 +71,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
@@ -90,7 +90,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
diff --git a/setup.py b/setup.py
index fefbd90..0157b7c 100644
--- a/setup.py
+++ b/setup.py
@@ -87,6 +87,7 @@
         'License :: OSI Approved :: MIT License',
         'Natural Language :: English',
         'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
     ],
@@ -109,7 +110,7 @@
     long_description_content_type='text/markdown',
     name='draco-ml',
     packages=find_packages(include=['draco', 'draco.*']),
-    python_requires='>=3.7,<3.9',
+    python_requires='>=3.6,<3.9',
     setup_requires=setup_requires,
     test_suite='tests',
     tests_require=tests_require,
diff --git a/tox.ini b/tox.ini
index 88295ee..0068931 100644
--- a/tox.ini
+++ b/tox.ini
@@ -5,17 +5,19 @@ commands =
     /usr/bin/env make docs
 
 [tox]
-envlist = py3{7,8}-{lint,readme,unit,minimum}
+envlist = py3{6,7,8}-{lint,readme,unit,minimum}
 
 [travis]
 python =
     3.8: py38-lint, py38-readme, py38-unit, py38-minimum, py38-tutorials
     3.7: py37-lint, py37-readme, py37-unit, py37-minimum, py37-tutorials
+    3.6: py36-lint, py36-readme, py36-unit, py36-minimum, py36-tutorials
 
 [gh-actions]
 python =
     3.8: py38-lint, py38-readme, py38-unit, py38-minimum, py38-tutorials
     3.7: py37-lint, py37-readme, py37-unit, py37-minimum, py37-tutorials
+    3.6: py36-lint, py36-readme, py36-unit, py36-minimum, py36-tutorials
 
 [testenv]
 passenv = CI TRAVIS TRAVIS_*

From 97ef930a4be356b4aa5cc64374122515c5cf53cb Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Mon, 7 Mar 2022 11:25:21 -0500
Subject: [PATCH 155/171] Reorganize pipelines (#64)

* change greenguard to draco

* update badge to gh workflow

* fix logo

* draco preprocessing sub-pipelining

* add pipelines (wip)

* reorganize (wip)

* rename pipelines

* fix lstm pipeline

* fix lint

* add regression pipeline

* add RUL dataset and notebooks

* return to master docker configuration

* remove draco pipeline as primitive
---
 .gitignore                                    |    2 +
 README.md                                     |   14 +-
 draco/__init__.py                             |    6 +-
 draco/demo.py                                 |   35 +-
 draco/pipeline.py                             |   32 +-
 .../classes/normalize_dfs_xgb_classifier.json |   65 -
 .../classes/unstack_dfs_xgb_classifier.json   |   78 -
 .../unstack_normalize_dfs_xgb_classifier.json |   69 -
 draco/pipelines/dfs_xgb/dfs_xgb.json          |   29 +
 ...fs_xgb_prob_with_double_normalization.json |   42 +
 .../dfs_xgb/dfs_xgb_prob_with_unstack.json    |   50 +
 ...s_xgb_prob_with_unstack_normalization.json |   49 +
 .../dfs_xgb_with_double_normalization.json    |   37 +
 .../dfs_xgb/dfs_xgb_with_normalization.json   |   29 +
 .../dfs_xgb/dfs_xgb_with_unstack.json         |   45 +
 .../dfs_xgb_with_unstack_normalization.json   |   44 +
 .../disabled/dfs_xgb_classifier.json          |   64 -
 .../normalize_dfs_xgb_classifier.json         |   46 -
 .../disabled/resample_dfs_xgb_classifier.json |   83 -
 .../resample_unstack_dfs_xgb_classifier.json  |   78 -
 ...ack_double_lstm_timeseries_classifier.json |  123 -
 ...le_unstack_lstm_timeseries_classifier.json |  123 -
 ..._unstack_normalize_dfs_xgb_classifier.json |   69 -
 .../double_lstm.json}                         |   12 +-
 .../double_lstm/double_lstm_prob.json         |   98 +
 .../double_lstm_prob_with_unstack.json}       |   37 +-
 .../double_lstm_with_unstack.json}            |   37 +-
 .../lstm.json}                                |   10 +-
 draco/pipelines/lstm/lstm_prob.json           |   98 +
 .../lstm_prob_with_unstack.json}              |   37 +-
 .../lstm_with_unstack.json}                   |   37 +-
 .../lstm_regressor/lstm_regressor.json        |   91 +
 .../lstm_regressor_with_unstack.json          |  106 +
 .../double_entity_normalization.json}         |   16 +-
 .../entity_dataframe.json}                    |   24 +-
 .../preprocessing/entity_normalization.json   |   20 +
 draco/pipelines/preprocessing/unstack.json    |   43 +
 .../normalize_dfs_xgb_classifier.json         |   70 -
 .../unstack_dfs_xgb_classifier.json           |   83 -
 .../unstack_normalize_dfs_xgb_classifier.json |   74 -
 ...nstacked_normalize_dfs_xgb_classifier.json |   39 -
 draco/primitives/mlblocks.MLPipeline.json     |   37 +
 tests/test_benchmark.py                       |    2 +-
 tests/test_pipeline.py                        |   20 +-
 tutorials/01_Draco_Machine_Learning.ipynb     |  246 +-
 tutorials/03_Benchmarking.ipynb               |   10 +-
 tutorials/04_Draco_Regression_Pipeline.ipynb  |  793 ++++++
 .../Convert NASA CMAPSS to Draco Format.ipynb |  406 +++
 ...> dfs_xgb_with_double_normalization.ipynb} | 1270 ++++-----
 ... dfs_xgb_with_unstack_normalization.ipynb} |  946 +++----
 .../pipelines/double_lstm_with_unstack.ipynb  | 2375 ++++++++++++++++
 .../lstm_regressor_with_unstack.ipynb         | 2499 +++++++++++++++++
 tutorials/pipelines/lstm_with_unstack.ipynb   | 2249 +++++++++++++++
 ...ck_double_lstm_timeseries_classifier.ipynb | 2481 ----------------
 .../unstack_lstm_timeseries_classifier.ipynb  | 2355 ----------------
 55 files changed, 10270 insertions(+), 7563 deletions(-)
 delete mode 100644 draco/pipelines/classes/normalize_dfs_xgb_classifier.json
 delete mode 100644 draco/pipelines/classes/unstack_dfs_xgb_classifier.json
 delete mode 100644 draco/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json
 create mode 100644 draco/pipelines/dfs_xgb/dfs_xgb.json
 create mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_prob_with_double_normalization.json
 create mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack.json
 create mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack_normalization.json
 create mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_with_double_normalization.json
 create mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_with_normalization.json
 create mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_with_unstack.json
 create mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_with_unstack_normalization.json
 delete mode 100644 draco/pipelines/disabled/dfs_xgb_classifier.json
 delete mode 100644 draco/pipelines/disabled/normalize_dfs_xgb_classifier.json
 delete mode 100644 draco/pipelines/disabled/resample_dfs_xgb_classifier.json
 delete mode 100644 draco/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json
 delete mode 100644 draco/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json
 delete mode 100644 draco/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json
 delete mode 100644 draco/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json
 rename draco/pipelines/{unstacked/unstacked_double_lstm_timeseries_classifier.json => double_lstm/double_lstm.json} (91%)
 create mode 100644 draco/pipelines/double_lstm/double_lstm_prob.json
 rename draco/pipelines/{probability/unstack_double_lstm_timeseries_classifier.json => double_lstm/double_lstm_prob_with_unstack.json} (79%)
 rename draco/pipelines/{classes/unstack_double_lstm_timeseries_classifier.json => double_lstm/double_lstm_with_unstack.json} (77%)
 rename draco/pipelines/{unstacked/unstacked_lstm_timeseries_classifier.json => lstm/lstm.json} (92%)
 create mode 100644 draco/pipelines/lstm/lstm_prob.json
 rename draco/pipelines/{probability/unstack_lstm_timeseries_classifier.json => lstm/lstm_prob_with_unstack.json} (79%)
 rename draco/pipelines/{classes/unstack_lstm_timeseries_classifier.json => lstm/lstm_with_unstack.json} (77%)
 create mode 100644 draco/pipelines/lstm_regressor/lstm_regressor.json
 create mode 100644 draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json
 rename draco/pipelines/{disabled/resample_normalize_dfs_xgb_classifier.json => preprocessing/double_entity_normalization.json} (73%)
 rename draco/pipelines/{unstacked/unstacked_dfs_xgb_classifier.json => preprocessing/entity_dataframe.json} (50%)
 create mode 100644 draco/pipelines/preprocessing/entity_normalization.json
 create mode 100644 draco/pipelines/preprocessing/unstack.json
 delete mode 100644 draco/pipelines/probability/normalize_dfs_xgb_classifier.json
 delete mode 100644 draco/pipelines/probability/unstack_dfs_xgb_classifier.json
 delete mode 100644 draco/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json
 delete mode 100644 draco/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json
 create mode 100644 draco/primitives/mlblocks.MLPipeline.json
 create mode 100644 tutorials/04_Draco_Regression_Pipeline.ipynb
 create mode 100644 tutorials/Convert NASA CMAPSS to Draco Format.ipynb
 rename tutorials/pipelines/{normalize_dfs_xgb_classifier.ipynb => dfs_xgb_with_double_normalization.ipynb} (58%)
 rename tutorials/pipelines/{unstack_normalize_dfs_xgb_classifier.ipynb => dfs_xgb_with_unstack_normalization.ipynb} (68%)
 create mode 100644 tutorials/pipelines/double_lstm_with_unstack.ipynb
 create mode 100644 tutorials/pipelines/lstm_regressor_with_unstack.ipynb
 create mode 100644 tutorials/pipelines/lstm_with_unstack.ipynb
 delete mode 100644 tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
 delete mode 100644 tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb

diff --git a/.gitignore b/.gitignore
index fe2c47a..b4e035b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -112,3 +112,5 @@ notebooks-private/
 scripts/
 dask-worker-space/
 tutorials/*.pkl
+
+*.DS_Store
diff --git a/README.md b/README.md
index 70eb0fe..2d398e4 100644
--- a/README.md
+++ b/README.md
@@ -220,18 +220,18 @@ The returned `pipeline` variable will be `list` containing the names of all the
 available in the Draco system:
 
 ```
-['classes.unstack_double_lstm_timeseries_classifier',
- 'classes.unstack_lstm_timeseries_classifier',
- 'classes.unstack_normalize_dfs_xgb_classifier',
- 'classes.unstack_dfs_xgb_classifier',
- 'classes.normalize_dfs_xgb_classifier']
+['dfs_xgb',
+ 'dfs_xgb_with_unstack',
+ 'dfs_xgb_with_normalization',
+ 'dfs_xgb_with_unstack_normalization',
+ 'dfs_xgb_prob_with_unstack_normalization']
 ```
 
 For the rest of this tutorial, we will select and use the pipeline
-`classes.normalize_dfs_xgb_classifier` as our template.
+`dfs_xgb_with_unstack_normalization` as our template.
 
 ```python3
-pipeline_name = 'classes.normalize_dfs_xgb_classifier'
+pipeline_name = 'dfs_xgb_with_unstack_normalization'
 ```
 
 ## 3. Fitting the Pipeline
diff --git a/draco/__init__.py b/draco/__init__.py
index db53888..a1a5e8a 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -11,9 +11,11 @@
 from draco.pipeline import DracoPipeline, get_pipelines
 
 _BASE_PATH = os.path.abspath(os.path.dirname(__file__))
-MLBLOCKS_PIPELINES = os.path.join(_BASE_PATH, 'pipelines')
 MLBLOCKS_PRIMITIVES = os.path.join(_BASE_PATH, 'primitives')
-
+MLBLOCKS_PIPELINES = tuple(
+    dirname
+    for dirname, _, _ in os.walk(os.path.join(_BASE_PATH, 'pipelines'))
+)
 
 __all__ = (
     'DracoPipeline',
diff --git a/draco/demo.py b/draco/demo.py
index f7848e5..dcd4126 100644
--- a/draco/demo.py
+++ b/draco/demo.py
@@ -10,6 +10,17 @@
 S3_URL = 'https://d3-ai-greenguard.s3.amazonaws.com/'
 DEMO_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'demo')
 
+_FILES = {
+    'DEFAULT': [
+        ('target_times', 'cutoff_time'),
+        ('readings', 'timestamp')
+    ],
+    'RUL': [
+        ('rul_train_target_times', 'cutoff_time'),
+        ('rul_test_target_times', 'cutoff_time'),
+        ('rul_readings', 'timestamp')
+    ]
+}
 
 def _load_or_download(filename, dates):
     filename += '.csv.gz'
@@ -27,23 +38,35 @@ def _load_or_download(filename, dates):
     return data
 
 
-def load_demo(load_readings=True):
+def load_demo(name='default', load_readings=True):
     """Load the demo included in the Draco project.
 
     The first time that this function is executed, the data will be downloaded
     and cached inside the `draco/demo` folder.
     Subsequent calls will load the cached data instead of downloading it again.
+    
+    Args:
+        rul (str):
+            Name of the dataset to load. If "RUL", load NASA's CMAPSS dataset
+            https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/#turbofan.
+            If "default" then load default demo.
+        load_readings (bool):
+            Whether to load the ``readings`` table or not.
 
     Returns:
         tuple[pandas.DataFrame]:
             target_times and readings tables
     """
-    target_times = _load_or_download('target_times', 'cutoff_time')
-    if load_readings:
-        readings = _load_or_download('readings', 'timestamp')
-        return target_times, readings
+    files = _FILES[name.upper()]
 
-    return target_times
+    if not load_readings:
+        files = files[:-1]
+
+    output = list()
+    for filename, dates in files:
+        output.append(_load_or_download(filename, dates))
+
+    return tuple(output)
 
 
 def generate_raw_readings(output_path='demo'):
diff --git a/draco/pipeline.py b/draco/pipeline.py
index b50567b..98fb3d7 100644
--- a/draco/pipeline.py
+++ b/draco/pipeline.py
@@ -54,7 +54,7 @@ def __setstate__(self, state):
 Sequential.__setstate__ = __setstate__
 
 
-def get_pipelines(pattern='', path=False, pipeline_type='classes'):
+def get_pipelines(pattern='', path=False, pipeline_type=None):
     """Get the list of available pipelines.
 
     Optionally filter the names using a patter or obtain
@@ -66,9 +66,8 @@ def get_pipelines(pattern='', path=False, pipeline_type='classes'):
         path (bool):
             Whether to return a dictionary containing the pipeline
             paths instead of only a list with the names.
-        pipeline_type (str):
-            The pipeline category to filter by (`classes`, `probability` and `unstacked`).
-            Defaults to `classes`.
+        pipeline_type (str or list[str]):
+            The pipeline category to filter. Defaults to `None`.
 
     Return:
         list or dict:
@@ -76,15 +75,24 @@ def get_pipelines(pattern='', path=False, pipeline_type='classes'):
             If `path=True`, return a dict containing the pipeline
             names as keys and their absolute paths as values.
     """
+    if isinstance(pipeline_type, str):
+        pipeline_type = [pipeline_type]
+    elif pipeline_type is None:
+        pipeline_type = os.listdir(PIPELINES_DIR)
+    
     pipelines = dict()
-    pipelines_dir = os.path.join(PIPELINES_DIR, pipeline_type)
-
-    for filename in os.listdir(pipelines_dir):
-        if filename.endswith('.json') and pattern in filename:
-            name = os.path.basename(filename)[:-len('.json')]
-            name = f'{pipeline_type}.{name}'
-            pipeline_path = os.path.join(pipelines_dir, filename)
-            pipelines[name] = pipeline_path
+    pipelines_dir = [
+        os.path.join(PIPELINES_DIR, ptype)
+        for ptype in pipeline_type
+        if ptype != 'preprocessing'
+    ]
+
+    for pdir in pipelines_dir:
+        for filename in os.listdir(pdir):
+            if filename.endswith('.json') and pattern in filename:
+                name = os.path.basename(filename)[:-len('.json')]
+                pipeline_path = os.path.join(pdir, filename)
+                pipelines[name] = pipeline_path
 
     if not path:
         pipelines = list(pipelines)
diff --git a/draco/pipelines/classes/normalize_dfs_xgb_classifier.json b/draco/pipelines/classes/normalize_dfs_xgb_classifier.json
deleted file mode 100644
index 8039d12..0000000
--- a/draco/pipelines/classes/normalize_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,65 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": true
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.normalize_entity#1": {
-            "base_entity_id": "readings",
-            "new_entity_id": "turbines",
-            "index": "turbine_id",
-            "make_time_index": false
-        },
-        "featuretools.EntitySet.normalize_entity#2": {
-            "base_entity_id": "readings",
-            "new_entity_id": "signals",
-            "index": "signal_id",
-            "make_time_index": false
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/classes/unstack_dfs_xgb_classifier.json b/draco/pipelines/classes/unstack_dfs_xgb_classifier.json
deleted file mode 100644
index 60be686..0000000
--- a/draco/pipelines/classes/unstack_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,78 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.add_relationship",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "entity_id": "turbines",
-            "index": "turbine_id",
-            "make_index": false
-        },
-        "featuretools.EntitySet.add_relationship#1": {
-            "parent": "turbines",
-            "parent_column": "turbine_id",
-            "child": "readings",
-            "child_column": "turbine_id"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": true,
-            "n_jobs": 1,
-            "training_window": "1d"
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "dataframe": "turbines"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json b/draco/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json
deleted file mode 100644
index 5c82d77..0000000
--- a/draco/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,69 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.normalize_entity#1": {
-            "base_entity_id": "readings",
-            "new_entity_id": "turbines",
-            "index": "turbine_id",
-            "make_time_index": false
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb.json b/draco/pipelines/dfs_xgb/dfs_xgb.json
new file mode 100644
index 0000000..4cb3cbf
--- /dev/null
+++ b/draco/pipelines/dfs_xgb/dfs_xgb.json
@@ -0,0 +1,29 @@
+{
+    "primitives": [
+        "mlblocks.MLPipeline",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier"
+    ],
+    "init_params": {
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "preprocessing.entity_dataframe"
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": false,
+            "n_jobs": 1,
+            "training_window": "1d"
+        }
+    },
+    "input_names": {
+        "mlblocks.MLPipeline#1": {
+            "dataframe": "readings"
+        }
+    }
+}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_double_normalization.json b/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_double_normalization.json
new file mode 100644
index 0000000..4231115
--- /dev/null
+++ b/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_double_normalization.json
@@ -0,0 +1,42 @@
+{
+    "primitives": [
+        "mlblocks.MLPipeline",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier:probabilities",
+        "numpy.take"
+    ],
+    "init_params": {
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "preprocessing.double_entity_normalization",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "featuretools.EntitySet.entity_from_dataframe#1": {
+                    "dataframe": "readings"
+                }
+            }
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": false,
+            "n_jobs": 1,
+            "training_window": "1d"
+        },
+        "numpy.take#1": {
+            "indices": 1,
+            "axis": 1
+        }
+    },
+    "input_names": {
+        "mlblocks.MLPipeline#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack.json b/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack.json
new file mode 100644
index 0000000..03ef141
--- /dev/null
+++ b/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack.json
@@ -0,0 +1,50 @@
+{
+    "primitives": [
+        "mlblocks.MLPipeline",
+        "mlblocks.MLPipeline",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier:probabilities",
+        "numpy.take"
+    ],
+    "init_params": {
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "preprocessing.unstack",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "pandas.DataFrame.unstack#1": {
+                    "X": "readings"
+                }
+            }
+        },
+        "mlblocks.MLPipeline#2": {
+            "pipeline": "preprocessing.entity_dataframe"
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": false,
+            "n_jobs": 1,
+            "training_window": "1d"
+        },
+        "numpy.take#1": {
+            "indices": 1,
+            "axis": 1
+        }
+    },
+    "input_names": {
+        "mlblocks.MLPipeline#1": {
+            "X": "readings"
+        },
+        "mlblocks.MLPipeline#2": {
+            "dataframe": "readings",
+            "turbines": "turbines"
+        }
+    }
+}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack_normalization.json b/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack_normalization.json
new file mode 100644
index 0000000..ca0c4fa
--- /dev/null
+++ b/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack_normalization.json
@@ -0,0 +1,49 @@
+{
+    "primitives": [
+        "mlblocks.MLPipeline",
+        "mlblocks.MLPipeline",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier:probabilities",
+        "numpy.take"
+    ],
+    "init_params": {
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "preprocessing.unstack",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "pandas.DataFrame.unstack#1": {
+                    "X": "readings"
+                }
+            }
+        },
+        "mlblocks.MLPipeline#2": {
+            "pipeline": "preprocessing.entity_normalization"
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": false,
+            "n_jobs": 1,
+            "training_window": "1d"
+        },
+        "numpy.take#1": {
+            "indices": 1,
+            "axis": 1
+        }
+    },
+    "input_names": {
+        "mlblocks.MLPipeline#1": {
+            "X": "readings"
+        },
+        "mlblocks.MLPipeline#2": {
+            "dataframe": "readings"
+        }
+    }
+}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_with_double_normalization.json b/draco/pipelines/dfs_xgb/dfs_xgb_with_double_normalization.json
new file mode 100644
index 0000000..82ae325
--- /dev/null
+++ b/draco/pipelines/dfs_xgb/dfs_xgb_with_double_normalization.json
@@ -0,0 +1,37 @@
+{
+    "primitives": [
+        "mlblocks.MLPipeline",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier"
+    ],
+    "init_params": {
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "preprocessing.double_entity_normalization",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "featuretools.EntitySet.entity_from_dataframe#1": {
+                    "dataframe": "readings"
+                }
+            }
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": false,
+            "n_jobs": 1,
+            "training_window": "1d"
+        }
+    },
+    "input_names": {
+        "mlblocks.MLPipeline#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_with_normalization.json b/draco/pipelines/dfs_xgb/dfs_xgb_with_normalization.json
new file mode 100644
index 0000000..d9d7911
--- /dev/null
+++ b/draco/pipelines/dfs_xgb/dfs_xgb_with_normalization.json
@@ -0,0 +1,29 @@
+{
+    "primitives": [
+        "mlblocks.MLPipeline",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier"
+    ],
+    "init_params": {
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "preprocessing.entity_normalization"
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": false,
+            "n_jobs": 1,
+            "training_window": "1d"
+        }
+    },
+    "input_names": {
+        "mlblocks.MLPipeline#1": {
+            "dataframe": "readings"
+        }
+    }
+}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack.json b/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack.json
new file mode 100644
index 0000000..dd01f23
--- /dev/null
+++ b/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack.json
@@ -0,0 +1,45 @@
+{
+    "primitives": [
+        "mlblocks.MLPipeline",
+        "mlblocks.MLPipeline",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier"
+    ],
+    "init_params": {
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "preprocessing.unstack",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "pandas.DataFrame.unstack#1": {
+                    "X": "readings"
+                }
+            }
+        },
+        "mlblocks.MLPipeline#2": {
+            "pipeline": "preprocessing.entity_dataframe"
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": false,
+            "n_jobs": 1,
+            "training_window": "1d"
+        }
+    },
+    "input_names": {
+        "mlblocks.MLPipeline#1": {
+            "X": "readings"
+        },
+        "mlblocks.MLPipeline#2": {
+            "dataframe": "readings",
+            "turbines": "turbines"
+        }
+    }
+}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack_normalization.json b/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack_normalization.json
new file mode 100644
index 0000000..87e6999
--- /dev/null
+++ b/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack_normalization.json
@@ -0,0 +1,44 @@
+{
+    "primitives": [
+        "mlblocks.MLPipeline",
+        "mlblocks.MLPipeline",
+        "featuretools.dfs",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "xgboost.XGBClassifier"
+    ],
+    "init_params": {
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "preprocessing.unstack",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "pandas.DataFrame.unstack#1": {
+                    "X": "readings"
+                }
+            }
+        },
+        "mlblocks.MLPipeline#2": {
+            "pipeline": "preprocessing.entity_normalization"
+        },
+        "featuretools.dfs#1": {
+            "target_entity": "turbines",
+            "index": "turbine_id",
+            "time_index": "cutoff_time",
+            "encode": false,
+            "max_depth": -1,
+            "copy": true,
+            "verbose": false,
+            "n_jobs": 1,
+            "training_window": "1d"
+        }
+    },
+    "input_names": {
+        "mlblocks.MLPipeline#1": {
+            "X": "readings"
+        },
+        "mlblocks.MLPipeline#2": {
+            "dataframe": "readings"
+        }
+    }
+}
diff --git a/draco/pipelines/disabled/dfs_xgb_classifier.json b/draco/pipelines/disabled/dfs_xgb_classifier.json
deleted file mode 100644
index f725e3b..0000000
--- a/draco/pipelines/disabled/dfs_xgb_classifier.json
+++ /dev/null
@@ -1,64 +0,0 @@
-{
-    "primitives": [
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.add_relationship",
-        "featuretools.EntitySet.add_relationship",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "entity_id": "turbines",
-            "index": "turbine_id",
-            "make_index": false
-        },
-        "featuretools.EntitySet.entity_from_dataframe#3": {
-            "entity_id": "signals",
-            "index": "signal_id",
-            "make_index": false
-        },
-        "featuretools.EntitySet.add_relationship#1": {
-            "parent": "turbines",
-            "parent_column": "turbine_id",
-            "child": "readings",
-            "child_column": "turbine_id"
-        },
-        "featuretools.EntitySet.add_relationship#2": {
-            "parent": "signals",
-            "parent_column": "signal_id",
-            "child": "readings",
-            "child_column": "signal_id"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": true,
-            "n_jobs": 1,
-            "training_window": "3d"
-        }
-    },
-    "input_names": {
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "dataframe": "turbines"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#3": {
-            "dataframe": "signals"
-        }
-    }
-}
diff --git a/draco/pipelines/disabled/normalize_dfs_xgb_classifier.json b/draco/pipelines/disabled/normalize_dfs_xgb_classifier.json
deleted file mode 100644
index 0622163..0000000
--- a/draco/pipelines/disabled/normalize_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,46 +0,0 @@
-{
-    "primitives": [
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.normalize_entity#1": {
-            "base_entity_id": "readings",
-            "new_entity_id": "turbines",
-            "index": "turbine_id",
-            "make_time_index": false
-        },
-        "featuretools.EntitySet.normalize_entity#2": {
-            "base_entity_id": "readings",
-            "new_entity_id": "signals",
-            "index": "signal_id",
-            "make_time_index": false
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": true,
-            "n_jobs": 1,
-            "training_window": "3d"
-        }
-    },
-    "input_names": {
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/disabled/resample_dfs_xgb_classifier.json b/draco/pipelines/disabled/resample_dfs_xgb_classifier.json
deleted file mode 100644
index 390e0b4..0000000
--- a/draco/pipelines/disabled/resample_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,83 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.add_relationship",
-        "featuretools.EntitySet.add_relationship",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "1h",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": true
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "entity_id": "turbines",
-            "index": "turbine_id",
-            "make_index": false
-        },
-        "featuretools.EntitySet.entity_from_dataframe#3": {
-            "entity_id": "signals",
-            "index": "signal_id",
-            "make_index": false
-        },
-        "featuretools.EntitySet.add_relationship#1": {
-            "parent": "turbines",
-            "parent_column": "turbine_id",
-            "child": "readings",
-            "child_column": "turbine_id"
-        },
-        "featuretools.EntitySet.add_relationship#2": {
-            "parent": "signals",
-            "parent_column": "signal_id",
-            "child": "readings",
-            "child_column": "signal_id"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": true,
-            "n_jobs": 1,
-            "training_window": "3d"
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "dataframe": "turbines"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#3": {
-            "dataframe": "signals"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json b/draco/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json
deleted file mode 100644
index 7775208..0000000
--- a/draco/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,78 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.add_relationship",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "1h",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "entity_id": "turbines",
-            "index": "turbine_id",
-            "make_index": false
-        },
-        "featuretools.EntitySet.add_relationship#1": {
-            "parent": "turbines",
-            "parent_column": "turbine_id",
-            "child": "readings",
-            "child_column": "turbine_id"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": true,
-            "n_jobs": 1,
-            "training_window": "3d"
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "dataframe": "turbines"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json b/draco/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json
deleted file mode 100644
index 75dadc9..0000000
--- a/draco/pipelines/disabled/resample_unstack_double_lstm_timeseries_classifier.json
+++ /dev/null
@@ -1,123 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
-        "pandas.DataFrame.pop",
-        "pandas.DataFrame.pop",
-        "sklearn.impute.SimpleImputer",
-        "sklearn.preprocessing.MinMaxScaler",
-        "pandas.DataFrame",
-        "pandas.DataFrame.set",
-        "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
-        "keras.Sequential.DoubleLSTMTimeSeriesClassifier"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "3600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        },
-        "pandas.DataFrame.pop#1": {
-            "item": "turbine_id"
-        },
-        "pandas.DataFrame.pop#2": {
-            "item": "timestamp"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "feature_range": [
-                -1,
-                1
-            ]
-        },
-        "pandas.DataFrame#1": {
-            "index": null,
-            "columns": null
-        },
-        "pandas.DataFrame.set#1": {
-            "key": "turbine_id"
-        },
-        "pandas.DataFrame.set#2": {
-            "key": "timestamp"
-        },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "window_size": 72,
-            "cutoff_time": "cutoff_time",
-            "time_index": "timestamp"
-        },
-        "keras.Sequential.DoubleLSTMTimeSeriesClassifier": {
-            "epochs": 20,
-            "verbose": true,
-            "input_shape": [
-                72,
-                97
-            ]
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#2": {
-            "X": "readings"
-        },
-        "sklearn.impute.SimpleImputer#1": {
-            "X": "readings"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.set#1": {
-            "X": "readings",
-            "value": "turbine_id"
-        },
-        "pandas.DataFrame.set#2": {
-            "X": "readings",
-            "value": "timestamp"
-        },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "timeseries": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#1": {
-            "item": "turbine_id"
-        },
-        "pandas.DataFrame.pop#2": {
-            "item": "timestamp"
-        },
-        "sklearn.impute.SimpleImputer#1": {
-            "X": "readings"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json b/draco/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json
deleted file mode 100644
index e33e83b..0000000
--- a/draco/pipelines/disabled/resample_unstack_lstm_timeseries_classifier.json
+++ /dev/null
@@ -1,123 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
-        "pandas.DataFrame.pop",
-        "pandas.DataFrame.pop",
-        "sklearn.impute.SimpleImputer",
-        "sklearn.preprocessing.MinMaxScaler",
-        "pandas.DataFrame",
-        "pandas.DataFrame.set",
-        "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
-        "keras.Sequential.LSTMTimeSeriesClassifier"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        },
-        "pandas.DataFrame.pop#1": {
-            "item": "turbine_id"
-        },
-        "pandas.DataFrame.pop#2": {
-            "item": "timestamp"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "feature_range": [
-                -1,
-                1
-            ]
-        },
-        "pandas.DataFrame#1": {
-            "index": null,
-            "columns": null
-        },
-        "pandas.DataFrame.set#1": {
-            "key": "turbine_id"
-        },
-        "pandas.DataFrame.set#2": {
-            "key": "timestamp"
-        },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "window_size": 144,
-            "cutoff_time": "cutoff_time",
-            "time_index": "timestamp"
-        },
-        "keras.Sequential.LSTMTimeSeriesClassifier": {
-            "epochs": 35,
-            "verbose": true,
-            "input_shape": [
-                144,
-                26
-            ]
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#2": {
-            "X": "readings"
-        },
-        "sklearn.impute.SimpleImputer#1": {
-            "X": "readings"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.set#1": {
-            "X": "readings",
-            "value": "turbine_id"
-        },
-        "pandas.DataFrame.set#2": {
-            "X": "readings",
-            "value": "timestamp"
-        },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "timeseries": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.pop#1": {
-            "item": "turbine_id"
-        },
-        "pandas.DataFrame.pop#2": {
-            "item": "timestamp"
-        },
-        "sklearn.impute.SimpleImputer#1": {
-            "X": "readings"
-        },
-        "sklearn.preprocessing.MinMaxScaler#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json b/draco/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json
deleted file mode 100644
index 4cbe1df..0000000
--- a/draco/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,69 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "1h",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.normalize_entity#1": {
-            "base_entity_id": "readings",
-            "new_entity_id": "turbines",
-            "index": "turbine_id",
-            "make_time_index": false
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": true,
-            "n_jobs": 1,
-            "training_window": "3d"
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/unstacked/unstacked_double_lstm_timeseries_classifier.json b/draco/pipelines/double_lstm/double_lstm.json
similarity index 91%
rename from draco/pipelines/unstacked/unstacked_double_lstm_timeseries_classifier.json
rename to draco/pipelines/double_lstm/double_lstm.json
index bf3065f..e3be8a5 100644
--- a/draco/pipelines/unstacked/unstacked_double_lstm_timeseries_classifier.json
+++ b/draco/pipelines/double_lstm/double_lstm.json
@@ -34,17 +34,13 @@
             "key": "timestamp"
         },
         "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "window_size": 72,
+            "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
         },
-        "keras.Sequential.DoubleLSTMTimeSeriesClassifier": {
-            "epochs": 20,
-            "verbose": true,
-            "input_shape": [
-                72,
-                97
-            ]
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier#1": {
+            "epochs": 35,
+            "verbose": false
         }
     },
     "input_names": {
diff --git a/draco/pipelines/double_lstm/double_lstm_prob.json b/draco/pipelines/double_lstm/double_lstm_prob.json
new file mode 100644
index 0000000..a118af0
--- /dev/null
+++ b/draco/pipelines/double_lstm/double_lstm_prob.json
@@ -0,0 +1,98 @@
+{
+    "primitives": [
+        "pandas.DataFrame.pop",
+        "pandas.DataFrame.pop",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.preprocessing.MinMaxScaler",
+        "pandas.DataFrame",
+        "pandas.DataFrame.set",
+        "pandas.DataFrame.set",
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier",
+        "numpy.take"
+    ],
+    "init_params": {
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "feature_range": [
+                -1,
+                1
+            ]
+        },
+        "pandas.DataFrame#1": {
+            "index": null,
+            "columns": null
+        },
+        "pandas.DataFrame.set#1": {
+            "key": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "key": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "window_size": 24,
+            "cutoff_time": "cutoff_time",
+            "time_index": "timestamp"
+        },
+        "keras.Sequential.DoubleLSTMTimeSeriesClassifier#1": {
+            "epochs": 35,
+            "verbose": false,
+            "classification": false,
+            "loss": "keras.losses.binary_crossentropy"
+        },
+        "numpy.take#1": {
+            "indices": 1,
+            "axis": 1
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.pop#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#2": {
+            "X": "readings"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.set#1": {
+            "X": "readings",
+            "value": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "X": "readings",
+            "value": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "timeseries": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/draco/pipelines/probability/unstack_double_lstm_timeseries_classifier.json b/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json
similarity index 79%
rename from draco/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
rename to draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json
index ea48a87..289a794 100644
--- a/draco/pipelines/probability/unstack_double_lstm_timeseries_classifier.json
+++ b/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json
@@ -1,7 +1,6 @@
 {
     "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
+        "mlblocks.MLPipeline",
         "pandas.DataFrame.pop",
         "pandas.DataFrame.pop",
         "sklearn.impute.SimpleImputer",
@@ -14,19 +13,16 @@
         "numpy.take"
     ],
     "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "3600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "unstack",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "pandas.DataFrame.unstack#1": {
+                    "X": "readings"
+                }
+            }
         },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
@@ -67,10 +63,7 @@
         }
     },
     "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
+        "mlblocks.MLPipeline#1": {
             "X": "readings"
         },
         "pandas.DataFrame.pop#1": {
@@ -101,12 +94,6 @@
         }
     },
     "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
         },
diff --git a/draco/pipelines/classes/unstack_double_lstm_timeseries_classifier.json b/draco/pipelines/double_lstm/double_lstm_with_unstack.json
similarity index 77%
rename from draco/pipelines/classes/unstack_double_lstm_timeseries_classifier.json
rename to draco/pipelines/double_lstm/double_lstm_with_unstack.json
index dede502..1d08259 100644
--- a/draco/pipelines/classes/unstack_double_lstm_timeseries_classifier.json
+++ b/draco/pipelines/double_lstm/double_lstm_with_unstack.json
@@ -1,7 +1,6 @@
 {
     "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
+        "mlblocks.MLPipeline",
         "pandas.DataFrame.pop",
         "pandas.DataFrame.pop",
         "sklearn.impute.SimpleImputer",
@@ -13,19 +12,16 @@
         "keras.Sequential.DoubleLSTMTimeSeriesClassifier"
     ],
     "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "3600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "unstack",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "pandas.DataFrame.unstack#1": {
+                    "X": "readings"
+                }
+            }
         },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
@@ -60,10 +56,7 @@
         }
     },
     "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
+        "mlblocks.MLPipeline#1": {
             "X": "readings"
         },
         "pandas.DataFrame.pop#1": {
@@ -94,12 +87,6 @@
         }
     },
     "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
         },
diff --git a/draco/pipelines/unstacked/unstacked_lstm_timeseries_classifier.json b/draco/pipelines/lstm/lstm.json
similarity index 92%
rename from draco/pipelines/unstacked/unstacked_lstm_timeseries_classifier.json
rename to draco/pipelines/lstm/lstm.json
index d2cbed7..c29b1c7 100644
--- a/draco/pipelines/unstacked/unstacked_lstm_timeseries_classifier.json
+++ b/draco/pipelines/lstm/lstm.json
@@ -34,17 +34,13 @@
             "key": "timestamp"
         },
         "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
-            "window_size": 72,
+            "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
         },
-        "keras.Sequential.LSTMTimeSeriesClassifier": {
+        "keras.Sequential.LSTMTimeSeriesClassifier#1": {
             "epochs": 35,
-            "verbose": true,
-            "input_shape": [
-                72,
-                97
-            ]
+            "verbose": false
         }
     },
     "input_names": {
diff --git a/draco/pipelines/lstm/lstm_prob.json b/draco/pipelines/lstm/lstm_prob.json
new file mode 100644
index 0000000..17da404
--- /dev/null
+++ b/draco/pipelines/lstm/lstm_prob.json
@@ -0,0 +1,98 @@
+{
+    "primitives": [
+        "pandas.DataFrame.pop",
+        "pandas.DataFrame.pop",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.preprocessing.MinMaxScaler",
+        "pandas.DataFrame",
+        "pandas.DataFrame.set",
+        "pandas.DataFrame.set",
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "keras.Sequential.LSTMTimeSeriesClassifier",
+        "numpy.take"
+    ],
+    "init_params": {
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "feature_range": [
+                -1,
+                1
+            ]
+        },
+        "pandas.DataFrame#1": {
+            "index": null,
+            "columns": null
+        },
+        "pandas.DataFrame.set#1": {
+            "key": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "key": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "window_size": 24,
+            "cutoff_time": "cutoff_time",
+            "time_index": "timestamp"
+        },
+        "keras.Sequential.LSTMTimeSeriesClassifier#1": {
+            "epochs": 35,
+            "verbose": false,
+            "classification": false,
+            "loss": "keras.losses.binary_crossentropy"
+        },
+        "numpy.take#1": {
+            "indices": 1,
+            "axis": 1
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.pop#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#2": {
+            "X": "readings"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.set#1": {
+            "X": "readings",
+            "value": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "X": "readings",
+            "value": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "timeseries": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/draco/pipelines/probability/unstack_lstm_timeseries_classifier.json b/draco/pipelines/lstm/lstm_prob_with_unstack.json
similarity index 79%
rename from draco/pipelines/probability/unstack_lstm_timeseries_classifier.json
rename to draco/pipelines/lstm/lstm_prob_with_unstack.json
index 9272257..1ad69bc 100644
--- a/draco/pipelines/probability/unstack_lstm_timeseries_classifier.json
+++ b/draco/pipelines/lstm/lstm_prob_with_unstack.json
@@ -1,7 +1,6 @@
 {
     "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
+        "mlblocks.MLPipeline",
         "pandas.DataFrame.pop",
         "pandas.DataFrame.pop",
         "sklearn.impute.SimpleImputer",
@@ -14,19 +13,16 @@
         "numpy.take"
     ],
     "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "3600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "unstack",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "pandas.DataFrame.unstack#1": {
+                    "X": "readings"
+                }
+            }
         },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
@@ -67,10 +63,7 @@
         }
     },
     "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
+        "mlblocks.MLPipeline#1": {
             "X": "readings"
         },
         "pandas.DataFrame.pop#1": {
@@ -101,12 +94,6 @@
         }
     },
     "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
         },
diff --git a/draco/pipelines/classes/unstack_lstm_timeseries_classifier.json b/draco/pipelines/lstm/lstm_with_unstack.json
similarity index 77%
rename from draco/pipelines/classes/unstack_lstm_timeseries_classifier.json
rename to draco/pipelines/lstm/lstm_with_unstack.json
index ab9dd99..18c486a 100644
--- a/draco/pipelines/classes/unstack_lstm_timeseries_classifier.json
+++ b/draco/pipelines/lstm/lstm_with_unstack.json
@@ -1,7 +1,6 @@
 {
     "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
+        "mlblocks.MLPipeline",
         "pandas.DataFrame.pop",
         "pandas.DataFrame.pop",
         "sklearn.impute.SimpleImputer",
@@ -13,19 +12,16 @@
         "keras.Sequential.LSTMTimeSeriesClassifier"
     ],
     "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "3600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "unstack",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "pandas.DataFrame.unstack#1": {
+                    "X": "readings"
+                }
+            }
         },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
@@ -60,10 +56,7 @@
         }
     },
     "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
+        "mlblocks.MLPipeline#1": {
             "X": "readings"
         },
         "pandas.DataFrame.pop#1": {
@@ -94,12 +87,6 @@
         }
     },
     "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
         },
diff --git a/draco/pipelines/lstm_regressor/lstm_regressor.json b/draco/pipelines/lstm_regressor/lstm_regressor.json
new file mode 100644
index 0000000..77ddb1e
--- /dev/null
+++ b/draco/pipelines/lstm_regressor/lstm_regressor.json
@@ -0,0 +1,91 @@
+{
+    "primitives": [
+        "pandas.DataFrame.pop",
+        "pandas.DataFrame.pop",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.preprocessing.MinMaxScaler",
+        "pandas.DataFrame",
+        "pandas.DataFrame.set",
+        "pandas.DataFrame.set",
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "keras.Sequential.LSTMTimeSeriesRegressor"
+    ],
+    "init_params": {
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "feature_range": [
+                -1,
+                1
+            ]
+        },
+        "pandas.DataFrame#1": {
+            "index": null,
+            "columns": null
+        },
+        "pandas.DataFrame.set#1": {
+            "key": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "key": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "window_size": 24,
+            "cutoff_time": "cutoff_time",
+            "time_index": "timestamp"
+        },
+        "keras.Sequential.LSTMTimeSeriesRegressor#1": {
+            "epochs": 35,
+            "verbose": false
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.pop#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#2": {
+            "X": "readings"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.set#1": {
+            "X": "readings",
+            "value": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "X": "readings",
+            "value": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "timeseries": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json b/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json
new file mode 100644
index 0000000..d546827
--- /dev/null
+++ b/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json
@@ -0,0 +1,106 @@
+{
+    "primitives": [
+        "mlblocks.MLPipeline",
+        "pandas.DataFrame.pop",
+        "pandas.DataFrame.pop",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.preprocessing.MinMaxScaler",
+        "pandas.DataFrame",
+        "pandas.DataFrame.set",
+        "pandas.DataFrame.set",
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "keras.Sequential.LSTMTimeSeriesRegressor"
+    ],
+    "init_params": {
+        "mlblocks.MLPipeline#1": {
+            "pipeline": "unstack",
+            "input_names": {
+                "pandas.DataFrame.resample#1": {
+                    "X": "df"
+                },
+                "pandas.DataFrame.unstack#1": {
+                    "X": "readings"
+                }
+            }
+        },
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "feature_range": [
+                -1,
+                1
+            ]
+        },
+        "pandas.DataFrame#1": {
+            "index": null,
+            "columns": null
+        },
+        "pandas.DataFrame.set#1": {
+            "key": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "key": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "window_size": 24,
+            "cutoff_time": "cutoff_time",
+            "time_index": "timestamp"
+        },
+        "keras.Sequential.LSTMTimeSeriesRegressor#1": {
+            "epochs": 35,
+            "verbose": false
+        }
+    },
+    "input_names": {
+        "mlblocks.MLPipeline#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.pop#2": {
+            "X": "readings"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.set#1": {
+            "X": "readings",
+            "value": "turbine_id"
+        },
+        "pandas.DataFrame.set#2": {
+            "X": "readings",
+            "value": "timestamp"
+        },
+        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+            "timeseries": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.pop#1": {
+            "item": "turbine_id"
+        },
+        "pandas.DataFrame.pop#2": {
+            "item": "timestamp"
+        },
+        "sklearn.impute.SimpleImputer#1": {
+            "X": "readings"
+        },
+        "sklearn.preprocessing.MinMaxScaler#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame#1": {
+            "X": "readings"
+        }
+    }
+}
diff --git a/draco/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json b/draco/pipelines/preprocessing/double_entity_normalization.json
similarity index 73%
rename from draco/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json
rename to draco/pipelines/preprocessing/double_entity_normalization.json
index 3d7d4d2..1438bbe 100644
--- a/draco/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json
+++ b/draco/pipelines/preprocessing/double_entity_normalization.json
@@ -3,10 +3,7 @@
         "pandas.DataFrame.resample",
         "featuretools.EntitySet.entity_from_dataframe",
         "featuretools.EntitySet.normalize_entity",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
+        "featuretools.EntitySet.normalize_entity"
     ],
     "init_params": {
         "pandas.DataFrame.resample#1": {
@@ -36,17 +33,6 @@
             "new_entity_id": "signals",
             "index": "signal_id",
             "make_time_index": false
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": true,
-            "n_jobs": 1,
-            "training_window": "1d"
         }
     },
     "input_names": {
diff --git a/draco/pipelines/unstacked/unstacked_dfs_xgb_classifier.json b/draco/pipelines/preprocessing/entity_dataframe.json
similarity index 50%
rename from draco/pipelines/unstacked/unstacked_dfs_xgb_classifier.json
rename to draco/pipelines/preprocessing/entity_dataframe.json
index e26fa00..0bd238a 100644
--- a/draco/pipelines/unstacked/unstacked_dfs_xgb_classifier.json
+++ b/draco/pipelines/preprocessing/entity_dataframe.json
@@ -2,10 +2,7 @@
     "primitives": [
         "featuretools.EntitySet.entity_from_dataframe",
         "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.add_relationship",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
+        "featuretools.EntitySet.add_relationship"
     ],
     "init_params": {
         "featuretools.EntitySet.entity_from_dataframe#1": {
@@ -24,25 +21,6 @@
             "parent_column": "turbine_id",
             "child": "readings",
             "child_column": "turbine_id"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": true,
-            "n_jobs": 1,
-            "training_window": "3d"
-        }
-    },
-    "input_names": {
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "dataframe": "turbines"
         }
     }
 }
diff --git a/draco/pipelines/preprocessing/entity_normalization.json b/draco/pipelines/preprocessing/entity_normalization.json
new file mode 100644
index 0000000..9f3f3ab
--- /dev/null
+++ b/draco/pipelines/preprocessing/entity_normalization.json
@@ -0,0 +1,20 @@
+{
+    "primitives": [
+        "featuretools.EntitySet.entity_from_dataframe",
+        "featuretools.EntitySet.normalize_entity"
+    ],
+    "init_params": {
+        "featuretools.EntitySet.entity_from_dataframe#1": {
+            "entity_id": "readings",
+            "index": "reading_id",
+            "make_index": true,
+            "time_index": "timestamp"
+        },
+        "featuretools.EntitySet.normalize_entity#1": {
+            "base_entity_id": "readings",
+            "new_entity_id": "turbines",
+            "index": "turbine_id",
+            "make_time_index": false
+        }
+    }
+}
diff --git a/draco/pipelines/preprocessing/unstack.json b/draco/pipelines/preprocessing/unstack.json
new file mode 100644
index 0000000..1acd833
--- /dev/null
+++ b/draco/pipelines/preprocessing/unstack.json
@@ -0,0 +1,43 @@
+{
+    "primitives": [
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack"
+    ],
+    "init_params": {
+        "pandas.DataFrame.resample#1": {
+            "rule": "600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
+        }
+    },
+    "input_names": {
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        }
+    },
+    "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        }
+    },
+    "outputs": {
+        "default": [
+            {
+                "name": "readings",
+                "variable": "pandas.DataFrame.unstack#1.readings"
+            }
+        ]
+    }
+}
diff --git a/draco/pipelines/probability/normalize_dfs_xgb_classifier.json b/draco/pipelines/probability/normalize_dfs_xgb_classifier.json
deleted file mode 100644
index 495a5d9..0000000
--- a/draco/pipelines/probability/normalize_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,70 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier:probabilities",
-        "numpy.take"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": true
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.normalize_entity#1": {
-            "base_entity_id": "readings",
-            "new_entity_id": "turbines",
-            "index": "turbine_id",
-            "make_time_index": false
-        },
-        "featuretools.EntitySet.normalize_entity#2": {
-            "base_entity_id": "readings",
-            "new_entity_id": "signals",
-            "index": "signal_id",
-            "make_time_index": false
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        },
-        "numpy.take#1": {
-            "indices": 1,
-            "axis": 1
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/probability/unstack_dfs_xgb_classifier.json b/draco/pipelines/probability/unstack_dfs_xgb_classifier.json
deleted file mode 100644
index aedbada..0000000
--- a/draco/pipelines/probability/unstack_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,83 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.add_relationship",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier:probabilities",
-        "numpy.take"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "entity_id": "turbines",
-            "index": "turbine_id",
-            "make_index": false
-        },
-        "featuretools.EntitySet.add_relationship#1": {
-            "parent": "turbines",
-            "parent_column": "turbine_id",
-            "child": "readings",
-            "child_column": "turbine_id"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": true,
-            "n_jobs": 1,
-            "training_window": "1d"
-        },
-        "numpy.take#1": {
-            "indices": 1,
-            "axis": 1
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "dataframe": "turbines"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json b/draco/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json
deleted file mode 100644
index eddddd8..0000000
--- a/draco/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,74 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier:probabilities",
-        "numpy.take"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.normalize_entity#1": {
-            "base_entity_id": "readings",
-            "new_entity_id": "turbines",
-            "index": "turbine_id",
-            "make_time_index": false
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        },
-        "numpy.take#1": {
-            "indices": 1,
-            "axis": 1
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json b/draco/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json
deleted file mode 100644
index dafefd3..0000000
--- a/draco/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json
+++ /dev/null
@@ -1,39 +0,0 @@
-{
-    "primitives": [
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.normalize_entity#1": {
-            "base_entity_id": "readings",
-            "new_entity_id": "turbines",
-            "index": "turbine_id",
-            "make_time_index": false
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": true,
-            "n_jobs": 1,
-            "training_window": "3d"
-        }
-    },
-    "input_names": {
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        }
-    }
-}
diff --git a/draco/primitives/mlblocks.MLPipeline.json b/draco/primitives/mlblocks.MLPipeline.json
new file mode 100644
index 0000000..aee1da2
--- /dev/null
+++ b/draco/primitives/mlblocks.MLPipeline.json
@@ -0,0 +1,37 @@
+{
+    "name": "mlblocks.MLPipeline",
+    "primitive": "mlblocks.MLPipeline",
+    "fit": {
+        "method": "fit",
+        "args": "get_fit_args"
+    },
+    "produce": {
+        "method": "predict",
+        "args": "get_predict_args",
+        "output": "get_outputs"
+    },
+    "hyperparameters": {
+        "fixed": {
+            "pipeline": {
+                "type": "str",
+                "default": null
+            },
+            "primitives": {
+                "type": "list",
+                "default": []
+            },
+            "init_params": {
+                "type": "dict",
+                "default": {}
+            },
+            "input_names": {
+                "type": "dict",
+                "default": {}
+            },
+            "output_names": {
+                "type": "dict",
+                "default": {}
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index 5d6f116..d88425b 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -8,7 +8,7 @@
 def test_predict():
     # setup
     templates = [
-        'probability.unstack_lstm_timeseries_classifier'
+        'dfs_xgb_prob_with_unstack_normalization'
     ]
 
     window_size_rule = [
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 725d299..f8526c9 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -6,8 +6,26 @@
 from unittest.mock import patch
 
 import pandas as pd
+import pytest
 
-from draco.pipeline import DracoPipeline
+from draco.pipeline import DracoPipeline, get_pipelines
+
+
+def test_get_pipelines():
+    output = get_pipelines()
+    assert isinstance(output, list)
+
+
+def test_get_pipelines_type():
+    output = get_pipelines(pipeline_type='lstm')
+    assert isinstance(output, list)
+    for path in output:
+        assert 'lstm' in path
+
+
+def test_get_pipelines_type_error():
+    with pytest.raises(FileNotFoundError):
+        get_pipelines(pipeline_type='does-not-exist')
 
 
 class TestDracoPipeline(TestCase):
diff --git a/tutorials/01_Draco_Machine_Learning.ipynb b/tutorials/01_Draco_Machine_Learning.ipynb
index 4a5fde7..6b1089e 100644
--- a/tutorials/01_Draco_Machine_Learning.ipynb
+++ b/tutorials/01_Draco_Machine_Learning.ipynb
@@ -414,28 +414,22 @@
     {
      "data": {
       "text/plain": [
-       "['unstacked.unstacked_normalize_dfs_xgb_classifier',\n",
-       " 'unstacked.unstacked_double_lstm_timeseries_classifier',\n",
-       " 'unstacked.unstacked_lstm_timeseries_classifier',\n",
-       " 'unstacked.unstacked_dfs_xgb_classifier',\n",
-       " 'classes.unstack_dfs_xgb_classifier',\n",
-       " 'classes.unstack_double_lstm_timeseries_classifier',\n",
-       " 'classes.normalize_dfs_xgb_classifier',\n",
-       " 'classes.unstack_lstm_timeseries_classifier',\n",
-       " 'classes.unstack_normalize_dfs_xgb_classifier',\n",
-       " 'disabled.resample_normalize_dfs_xgb_classifier',\n",
-       " 'disabled.resample_unstack_lstm_timeseries_classifier',\n",
-       " 'disabled.resample_unstack_normalize_dfs_xgb_classifier',\n",
-       " 'disabled.normalize_dfs_xgb_classifier',\n",
-       " 'disabled.resample_unstack_double_lstm_timeseries_classifier',\n",
-       " 'disabled.resample_dfs_xgb_classifier',\n",
-       " 'disabled.resample_unstack_dfs_xgb_classifier',\n",
-       " 'disabled.dfs_xgb_classifier',\n",
-       " 'probability.unstack_dfs_xgb_classifier',\n",
-       " 'probability.unstack_double_lstm_timeseries_classifier',\n",
-       " 'probability.normalize_dfs_xgb_classifier',\n",
-       " 'probability.unstack_lstm_timeseries_classifier',\n",
-       " 'probability.unstack_normalize_dfs_xgb_classifier']"
+       "['dfs_xgb_prob_with_unstack',\n",
+       " 'dfs_xgb_with_normalization',\n",
+       " 'dfs_xgb',\n",
+       " 'dfs_xgb_with_unstack',\n",
+       " 'dfs_xgb_prob_with_unstack_normalization',\n",
+       " 'dfs_xgb_with_unstack_normalization',\n",
+       " 'dfs_xgb_prob_with_double_normalization',\n",
+       " 'dfs_xgb_with_double_normalization',\n",
+       " 'double_lstm_prob_with_unstack',\n",
+       " 'double_lstm_prob',\n",
+       " 'double_lstm',\n",
+       " 'double_lstm_with_unstack',\n",
+       " 'lstm_prob_with_unstack',\n",
+       " 'lstm_with_unstack',\n",
+       " 'lstm_prob',\n",
+       " 'lstm']"
       ]
      },
      "execution_count": 10,
@@ -464,20 +458,14 @@
     {
      "data": {
       "text/plain": [
-       "['unstacked.unstacked_normalize_dfs_xgb_classifier',\n",
-       " 'unstacked.unstacked_dfs_xgb_classifier',\n",
-       " 'classes.unstack_dfs_xgb_classifier',\n",
-       " 'classes.normalize_dfs_xgb_classifier',\n",
-       " 'classes.unstack_normalize_dfs_xgb_classifier',\n",
-       " 'disabled.resample_normalize_dfs_xgb_classifier',\n",
-       " 'disabled.resample_unstack_normalize_dfs_xgb_classifier',\n",
-       " 'disabled.normalize_dfs_xgb_classifier',\n",
-       " 'disabled.resample_dfs_xgb_classifier',\n",
-       " 'disabled.resample_unstack_dfs_xgb_classifier',\n",
-       " 'disabled.dfs_xgb_classifier',\n",
-       " 'probability.unstack_dfs_xgb_classifier',\n",
-       " 'probability.normalize_dfs_xgb_classifier',\n",
-       " 'probability.unstack_normalize_dfs_xgb_classifier']"
+       "['dfs_xgb_prob_with_unstack',\n",
+       " 'dfs_xgb_with_normalization',\n",
+       " 'dfs_xgb',\n",
+       " 'dfs_xgb_with_unstack',\n",
+       " 'dfs_xgb_prob_with_unstack_normalization',\n",
+       " 'dfs_xgb_with_unstack_normalization',\n",
+       " 'dfs_xgb_prob_with_double_normalization',\n",
+       " 'dfs_xgb_with_double_normalization']"
       ]
      },
      "execution_count": 11,
@@ -505,20 +493,14 @@
     {
      "data": {
       "text/plain": [
-       "{'unstacked.unstacked_normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/unstacked/unstacked_normalize_dfs_xgb_classifier.json',\n",
-       " 'unstacked.unstacked_dfs_xgb_classifier': '/Draco/draco/pipelines/unstacked/unstacked_dfs_xgb_classifier.json',\n",
-       " 'classes.unstack_dfs_xgb_classifier': '/Draco/draco/pipelines/classes/unstack_dfs_xgb_classifier.json',\n",
-       " 'classes.normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/classes/normalize_dfs_xgb_classifier.json',\n",
-       " 'classes.unstack_normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/classes/unstack_normalize_dfs_xgb_classifier.json',\n",
-       " 'disabled.resample_normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/resample_normalize_dfs_xgb_classifier.json',\n",
-       " 'disabled.resample_unstack_normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/resample_unstack_normalize_dfs_xgb_classifier.json',\n",
-       " 'disabled.normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/normalize_dfs_xgb_classifier.json',\n",
-       " 'disabled.resample_dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/resample_dfs_xgb_classifier.json',\n",
-       " 'disabled.resample_unstack_dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/resample_unstack_dfs_xgb_classifier.json',\n",
-       " 'disabled.dfs_xgb_classifier': '/Draco/draco/pipelines/disabled/dfs_xgb_classifier.json',\n",
-       " 'probability.unstack_dfs_xgb_classifier': '/Draco/draco/pipelines/probability/unstack_dfs_xgb_classifier.json',\n",
-       " 'probability.normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/probability/normalize_dfs_xgb_classifier.json',\n",
-       " 'probability.unstack_normalize_dfs_xgb_classifier': '/Draco/draco/pipelines/probability/unstack_normalize_dfs_xgb_classifier.json'}"
+       "{'dfs_xgb_prob_with_unstack': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack.json',\n",
+       " 'dfs_xgb_with_normalization': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_with_normalization.json',\n",
+       " 'dfs_xgb': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb.json',\n",
+       " 'dfs_xgb_with_unstack': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack.json',\n",
+       " 'dfs_xgb_prob_with_unstack_normalization': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack_normalization.json',\n",
+       " 'dfs_xgb_with_unstack_normalization': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack_normalization.json',\n",
+       " 'dfs_xgb_prob_with_double_normalization': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_double_normalization.json',\n",
+       " 'dfs_xgb_with_double_normalization': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_with_double_normalization.json'}"
       ]
      },
      "execution_count": 12,
@@ -557,8 +539,8 @@
    "outputs": [],
    "source": [
     "templates = [\n",
-    "    'classes.unstack_normalize_dfs_xgb_classifier', \n",
-    "    'classes.normalize_dfs_xgb_classifier'\n",
+    "    'dfs_xgb_with_unstack_normalization', \n",
+    "    'dfs_xgb_with_double_normalization'\n",
     "]"
    ]
   },
@@ -631,9 +613,9 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:btb.session:Obtaining default configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Obtaining default configuration for dfs_xgb_with_unstack_normalization\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: classes.unstack_normalize_dfs_xgb_classifier \n",
+      "  Template: dfs_xgb_with_unstack_normalization \n",
       "    Hyperparameters: \n",
       "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
       "      ('xgboost.XGBClassifier#1', 'n_estimators'): 100\n",
@@ -641,26 +623,46 @@
       "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
       "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
       "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
-      "INFO:btb.session:New optimal found: classes.unstack_normalize_dfs_xgb_classifier - 0.611234532127027\n",
-      "INFO:btb.session:Obtaining default configuration for classes.normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n"
+      "INFO:btb.session:New optimal found: dfs_xgb_with_unstack_normalization - 0.6117760013143775\n",
+      "INFO:btb.session:Obtaining default configuration for dfs_xgb_with_double_normalization\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
+      "INFO:draco.pipeline:New configuration found:\n",
+      "  Template: dfs_xgb_with_unstack_normalization \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 90\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 342\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 6\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.9043352048331922\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.5258350872963311\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 9\n",
+      "INFO:btb.session:New optimal found: dfs_xgb_with_unstack_normalization - 0.6205571445297473\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n",
+      "INFO:draco.pipeline:New configuration found:\n",
+      "  Template: dfs_xgb_with_double_normalization \n",
+      "    Hyperparameters: \n",
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 80\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 66\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 10\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6434375682152088\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.14135407511034503\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 10\n",
+      "INFO:btb.session:New optimal found: dfs_xgb_with_double_normalization - 0.629513025867624\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': 'afc8e912142bc6c384231600df9874fc',\n",
-       " 'name': 'classes.unstack_normalize_dfs_xgb_classifier',\n",
+       "{'id': '452a22a136f67c575aee3341c9dc2395',\n",
+       " 'name': 'dfs_xgb_with_double_normalization',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 0,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 100,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.0,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
-       " 'score': 0.611234532127027}"
+       "   'max_labels'): 80,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 66,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 10,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6434375682152088,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.14135407511034503,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
+       " 'score': 0.629513025867624}"
       ]
      },
      "execution_count": 16,
@@ -688,16 +690,16 @@
     {
      "data": {
       "text/plain": [
-       "{'id': 'afc8e912142bc6c384231600df9874fc',\n",
-       " 'name': 'classes.unstack_normalize_dfs_xgb_classifier',\n",
+       "{'id': '452a22a136f67c575aee3341c9dc2395',\n",
+       " 'name': 'dfs_xgb_with_double_normalization',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 0,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 100,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.0,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 1},\n",
-       " 'score': 0.611234532127027}"
+       "   'max_labels'): 80,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 66,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 10,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6434375682152088,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.14135407511034503,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
+       " 'score': 0.629513025867624}"
       ]
      },
      "execution_count": 17,
@@ -725,12 +727,12 @@
      "data": {
       "text/plain": [
        "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 0,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 100,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 3,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.0,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 1}"
+       "  'max_labels'): 80,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 66,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 10,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6434375682152088,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.14135407511034503,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10}"
       ]
      },
      "execution_count": 18,
@@ -757,7 +759,7 @@
     {
      "data": {
       "text/plain": [
-       "'classes.unstack_normalize_dfs_xgb_classifier'"
+       "'dfs_xgb_with_double_normalization'"
       ]
      },
      "execution_count": 19,
@@ -785,7 +787,7 @@
     {
      "data": {
       "text/plain": [
-       "0.611234532127027"
+       "0.629513025867624"
       ]
      },
      "execution_count": 20,
@@ -815,41 +817,41 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: classes.unstack_normalize_dfs_xgb_classifier \n",
+      "  Template: dfs_xgb_with_unstack_normalization \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 97\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 364\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 7\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6635800510691365\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.9852977392614163\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 2\n",
-      "INFO:btb.session:New optimal found: classes.unstack_normalize_dfs_xgb_classifier - 0.6379648413546719\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.unstack_normalize_dfs_xgb_classifier\n",
-      "INFO:btb.session:Generating new proposal configuration for classes.normalize_dfs_xgb_classifier\n"
+      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 48\n",
+      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 130\n",
+      "      ('xgboost.XGBClassifier#1', 'max_depth'): 8\n",
+      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.7437898568465957\n",
+      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.9963350624783064\n",
+      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 10\n",
+      "INFO:btb.session:New optimal found: dfs_xgb_with_unstack_normalization - 0.651642052400304\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n",
+      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': '7e6de03286fd71179e2a2f7b3f089ffb',\n",
-       " 'name': 'classes.unstack_normalize_dfs_xgb_classifier',\n",
+       "{'id': '22ec731234212508b7b4413ccce34294',\n",
+       " 'name': 'dfs_xgb_with_unstack_normalization',\n",
        " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 97,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 364,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 7,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6635800510691365,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.9852977392614163,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 2},\n",
-       " 'score': 0.6379648413546719}"
+       "   'max_labels'): 48,\n",
+       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 130,\n",
+       "  ('xgboost.XGBClassifier#1', 'max_depth'): 8,\n",
+       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.7437898568465957,\n",
+       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.9963350624783064,\n",
+       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
+       " 'score': 0.651642052400304}"
       ]
      },
      "execution_count": 21,
@@ -869,7 +871,7 @@
     {
      "data": {
       "text/plain": [
-       "0.6379648413546719"
+       "0.651642052400304"
       ]
      },
      "execution_count": 22,
@@ -890,12 +892,12 @@
      "data": {
       "text/plain": [
        "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 97,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 364,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 7,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6635800510691365,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.9852977392614163,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 2}"
+       "  'max_labels'): 48,\n",
+       " ('xgboost.XGBClassifier#1', 'n_estimators'): 130,\n",
+       " ('xgboost.XGBClassifier#1', 'max_depth'): 8,\n",
+       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.7437898568465957,\n",
+       " ('xgboost.XGBClassifier#1', 'gamma'): 0.9963350624783064,\n",
+       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10}"
       ]
      },
      "execution_count": 23,
@@ -962,7 +964,7 @@
     {
      "data": {
       "text/plain": [
-       "0.7346938775510203"
+       "0.608695652173913"
       ]
      },
      "execution_count": 26,
@@ -1051,7 +1053,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1065,7 +1067,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.9"
+   "version": "3.7.11"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/03_Benchmarking.ipynb b/tutorials/03_Benchmarking.ipynb
index c7bce62..12c5e47 100644
--- a/tutorials/03_Benchmarking.ipynb
+++ b/tutorials/03_Benchmarking.ipynb
@@ -76,12 +76,12 @@
    "outputs": [],
    "source": [
     "templates = [\n",
-    "    'probability.unstack_lstm_timeseries_classifier',\n",
-    "    'probability.normalize_dfs_xgb_classifier'\n",
+    "    'lstm_prob_with_unstack',\n",
+    "    'dfs_xgb_prob_with_double_normalization'\n",
     "]\n",
     "window_size_rule = [('1d', '1h'), ('2d', '2h')]\n",
     "init_params = {\n",
-    "    'unstack_lstm_timeseries_classifier': {\n",
+    "    'lstm_prob_with_unstack': {\n",
     "        'keras.Sequential.LSTMTimeSeriesClassifier#1': {\n",
     "            'epochs': 1,\n",
     "        }\n",
@@ -389,7 +389,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -403,7 +403,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.12"
+   "version": "3.7.11"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/04_Draco_Regression_Pipeline.ipynb b/tutorials/04_Draco_Regression_Pipeline.ipynb
new file mode 100644
index 0000000..709c839
--- /dev/null
+++ b/tutorials/04_Draco_Regression_Pipeline.ipynb
@@ -0,0 +1,793 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Draco Regression Pipeline"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In this tutorial we will show you how to use Draco Regression pipelines to solve a Machine Learning problem\n",
+    "defined via a Target Times table.\n",
+    "\n",
+    "During the next steps we will:\n",
+    "\n",
+    "- Load demo Remaining Useful Life (dataset) with training and testing target times and readings\n",
+    "- Find available pipelines and load one of them\n",
+    "- Build and fit a Machine Learning pipeline\n",
+    "- Make predictions using the fitted pipeline\n",
+    "- Evaluate how good the predictions are"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 0. Setup the logging\n",
+    "\n",
+    "This step sets up logging in our environment to increase our visibility over\n",
+    "the steps that Draco performs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import logging;\n",
+    "\n",
+    "logging.basicConfig(level=logging.INFO)\n",
+    "logging.getLogger().setLevel(level=logging.INFO)\n",
+    "\n",
+    "import warnings\n",
+    "warnings.simplefilter(\"ignore\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Load the Data\n",
+    "\n",
+    "The first step is to load the data that we are going to use.\n",
+    "\n",
+    "In order to use the demo data included in Draco, the `draco.demo.load_demo` function can be used."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from draco.demo import load_demo\n",
+    "\n",
+    "train_target_times, test_target_times, readings = load_demo(name='rul')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "This will download some demo data from [Draco S3 demo Bucket](\n",
+    "https://d3-ai-draco.s3.amazonaws.com/index.html)%20and%20load%20it%20as/n",
+    "the necessary `target_times` and `readings` tables.\n",
+    "\n",
+    "The exact format of these tables is described in the Draco README and docs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:20:00</td>\n",
+       "      <td>166</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:30:00</td>\n",
+       "      <td>165</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:40:00</td>\n",
+       "      <td>164</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:50:00</td>\n",
+       "      <td>163</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 05:00:00</td>\n",
+       "      <td>162</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   turbine_id         cutoff_time  target\n",
+       "0           1 2013-01-12 04:20:00     166\n",
+       "1           1 2013-01-12 04:30:00     165\n",
+       "2           1 2013-01-12 04:40:00     164\n",
+       "3           1 2013-01-12 04:50:00     163\n",
+       "4           1 2013-01-12 05:00:00     162"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(18131, 3)"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_target_times.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "turbine_id              int64\n",
+       "cutoff_time    datetime64[ns]\n",
+       "target                  int64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_target_times.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-13 13:10:00</td>\n",
+       "      <td>112.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2013-01-14 08:00:00</td>\n",
+       "      <td>98.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2013-01-14 02:50:00</td>\n",
+       "      <td>69.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2013-01-14 01:10:00</td>\n",
+       "      <td>82.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2013-01-14 13:10:00</td>\n",
+       "      <td>91.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   turbine_id         cutoff_time  target\n",
+       "0           1 2013-01-13 13:10:00   112.0\n",
+       "1           2 2013-01-14 08:00:00    98.0\n",
+       "2           3 2013-01-14 02:50:00    69.0\n",
+       "3           4 2013-01-14 01:10:00    82.0\n",
+       "4           5 2013-01-14 13:10:00    91.0"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(100, 3)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_target_times.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "turbine_id              int64\n",
+       "cutoff_time    datetime64[ns]\n",
+       "target                float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_target_times.dtypes"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:10:00</td>\n",
+       "      <td>operational setting 1</td>\n",
+       "      <td>-0.0007</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:20:00</td>\n",
+       "      <td>operational setting 1</td>\n",
+       "      <td>0.0019</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:30:00</td>\n",
+       "      <td>operational setting 1</td>\n",
+       "      <td>-0.0043</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:40:00</td>\n",
+       "      <td>operational setting 1</td>\n",
+       "      <td>0.0007</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:50:00</td>\n",
+       "      <td>operational setting 1</td>\n",
+       "      <td>-0.0019</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   turbine_id           timestamp              signal_id   value\n",
+       "0           1 2013-01-12 00:10:00  operational setting 1 -0.0007\n",
+       "1           1 2013-01-12 00:20:00  operational setting 1  0.0019\n",
+       "2           1 2013-01-12 00:30:00  operational setting 1 -0.0043\n",
+       "3           1 2013-01-12 00:40:00  operational setting 1  0.0007\n",
+       "4           1 2013-01-12 00:50:00  operational setting 1 -0.0019"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(809448, 4)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "turbine_id             int64\n",
+       "timestamp     datetime64[ns]\n",
+       "signal_id             object\n",
+       "value                float64\n",
+       "dtype: object"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.dtypes"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Load your own Dataset"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Alternatively, if you want to load your own dataset, all you have to do is load the\n",
+    "`target_times` and `readings` tables as `pandas.DataFrame` objects.\n",
+    "\n",
+    "Make sure to parse the corresponding datetime fields!\n",
+    "\n",
+    "```python\n",
+    "import pandas as pd\n",
+    "\n",
+    "target_times = pd.read_csv('path/to/your/target_times.csv', parse_dates=['cutoff_time'])\n",
+    "readings = pd.read_csv('path/to/your/readings.csv', parse_dates=['timestamp'])\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Finding the available Pipelines\n",
+    "\n",
+    "The next step will be to select a collection of templates from the ones\n",
+    "available in Draco.\n",
+    "\n",
+    "For this, we can use the `draco.get_pipelines` function, which will\n",
+    "return us the list of all the available MLBlocks pipelines found in the\n",
+    "Draco system."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['dfs_xgb_prob_with_unstack',\n",
+       " 'dfs_xgb_with_normalization',\n",
+       " 'dfs_xgb',\n",
+       " 'dfs_xgb_with_unstack',\n",
+       " 'dfs_xgb_prob_with_unstack_normalization',\n",
+       " 'dfs_xgb_with_unstack_normalization',\n",
+       " 'dfs_xgb_prob_with_double_normalization',\n",
+       " 'dfs_xgb_with_double_normalization',\n",
+       " 'lstm_regressor_with_unstack',\n",
+       " 'lstm_regressor',\n",
+       " 'double_lstm_prob_with_unstack',\n",
+       " 'double_lstm_prob',\n",
+       " 'double_lstm',\n",
+       " 'double_lstm_with_unstack',\n",
+       " 'lstm_prob_with_unstack',\n",
+       " 'lstm_with_unstack',\n",
+       " 'lstm_prob',\n",
+       " 'lstm']"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from draco import get_pipelines\n",
+    "\n",
+    "get_pipelines()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Optionally, we can pass a string to select the pipelines that contain it:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['lstm_regressor_with_unstack', 'lstm_regressor']"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "get_pipelines('regressor')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "We will use the regression pipeline `lstm_regressor_with_unstack`\n",
+    "\n",
+    "The `lstm_regressor_with_unstack` pipeline contains the following steps:\n",
+    "\n",
+    "- Resample the data using a 10 minute average aggregation\n",
+    "- Unstack the data by signal, so each signal is in a different column\n",
+    "- Impute missing values in the readings table\n",
+    "- Normalize (scale) the data between [-1, 1].\n",
+    "- Create window sequences using target times.\n",
+    "- Apply an LSTM Regressor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_name = 'lstm_regressor_with_unstack'"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Fitting a Draco Pipeline\n",
+    "\n",
+    "Once we have loaded the data, we create a **DracoPipeline** instance by passing `pipeline_name` which is the name of a pipeline, the path to a template json file, or a list that can combine both of them."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from draco.pipeline import DracoPipeline\n",
+    "\n",
+    "pipeline = DracoPipeline(pipeline_name)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "To train a pipeline we use the `fit` method passing the `target_times` and the `readings` table:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-02-01 15:05:13.365367: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2022-02-01 15:05:13.379993: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fe6a0ec50a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+      "2022-02-01 15:05:13.380010: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
+     ]
+    }
+   ],
+   "source": [
+    "pipeline.fit(train_target_times, readings)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Use the fitted pipeline\n",
+    "\n",
+    "After fitting the pipeline, we are ready to make predictions on new data:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "predictions = pipeline.predict(test_target_times, readings)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "And evaluate its prediction performance:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.6362969806460871"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.metrics import r2_score\n",
+    "\n",
+    "r2_score(test_target_times['target'], predictions)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Save and load the pipeline\n",
+    "\n",
+    "Since the tuning and fitting process takes time to execute and requires a lot of data, you\n",
+    "will probably want to save a fitted instance and load it later to analyze new signals\n",
+    "instead of fitting pipelines over and over again.\n",
+    "\n",
+    "This can be done by using the `save` and `load` methods from the `DracoPipeline`.\n",
+    "\n",
+    "In order to save an instance, call its `save` method passing it the path and filename\n",
+    "where the model should be saved."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "path = 'my_pipeline.pkl'\n",
+    "\n",
+    "pipeline.save(path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Once the pipeline is saved, it can be loaded back as a new `DracoPipeline` by using the\n",
+    "`DracoPipeline.load` method:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "new_pipeline = DracoPipeline.load(path)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Once loaded, it can be directly used to make predictions on new data."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[129.89064 ],\n",
+       "       [139.89001 ],\n",
+       "       [ 39.425865],\n",
+       "       [110.67838 ],\n",
+       "       [ 98.52903 ]], dtype=float32)"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "predictions = new_pipeline.predict(test_target_times, readings)\n",
+    "predictions[0:5]"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tutorials/Convert NASA CMAPSS to Draco Format.ipynb b/tutorials/Convert NASA CMAPSS to Draco Format.ipynb
new file mode 100644
index 0000000..bf5af31
--- /dev/null
+++ b/tutorials/Convert NASA CMAPSS to Draco Format.ipynb	
@@ -0,0 +1,406 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "2f3d8acf",
+   "metadata": {},
+   "source": [
+    "# Convert CMAPSS to Draco Format\n",
+    "\n",
+    "In this notebook we download [CMAPSS](https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/#turbofan) data and reformat it as Draco pipelines expect."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "f39b805c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import datetime\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "\n",
+    "import matplotlib.pyplot as plt"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "626a2da0",
+   "metadata": {},
+   "source": [
+    "## 1. Download Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "ff641cff",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import io\n",
+    "import os\n",
+    "import urllib\n",
+    "import zipfile\n",
+    "\n",
+    "DATA_URL = 'https://d3-ai-greenguard.s3.amazonaws.com/CMAPSSData.zip'\n",
+    "\n",
+    "response = urllib.request.urlopen(DATA_URL)\n",
+    "bytes_io = io.BytesIO(response.read())\n",
+    "\n",
+    "with zipfile.ZipFile(bytes_io) as zf:\n",
+    "    zf.extractall('CMAPSSData')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9c435699",
+   "metadata": {},
+   "source": [
+    "## 2. Read Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "1bb002ac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# columns\n",
+    "\n",
+    "index = ['unit number', 'time, in cycles']\n",
+    "setting = ['operational setting {}'.format(i + 1) for i in range(0, 3)]\n",
+    "sensor = ['sensor measurement {}'.format(i + 1) for i in range(0, 21)]\n",
+    "\n",
+    "all_columns = index + setting + sensor"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "74478b0f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "train = pd.read_csv('CMAPSSData/train_FD001.txt', sep=' ', header=None)\n",
+    "train = train.dropna(axis=1)\n",
+    "train.columns = all_columns\n",
+    "\n",
+    "test = pd.read_csv('CMAPSSData/test_FD001.txt', sep=' ', header=None)\n",
+    "test = test.dropna(axis=1)\n",
+    "test.columns = all_columns\n",
+    "\n",
+    "y_test = pd.read_csv('CMAPSSData/RUL_FD001.txt', sep=' ', header=None)\n",
+    "y_test = y_test.dropna(axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "dd480185",
+   "metadata": {},
+   "source": [
+    "## 3. Create columns\n",
+    "\n",
+    "### 3.a create `RUL` column\n",
+    "How do we create **Remaining Useful Life (RUL)** column for the training dataset? We can assume that the last entry in the training dataset is the maximum life expectancy for that unit. Then each cycle we have will decrease by that number."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "eb0270ba",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_max(x):\n",
+    "    return cycles_max[x]\n",
+    "\n",
+    "cycles_max = train.groupby(\"unit number\")[\"time, in cycles\"].max().to_dict()\n",
+    "cycles_max = train['unit number'].apply(get_max)\n",
+    "\n",
+    "train['RUL'] = cycles_max - train[\"time, in cycles\"]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "57fbd3b9",
+   "metadata": {},
+   "source": [
+    "### 3.b create `cutoff_time` column\n",
+    "`cutoff_time` is a datetime column with relation to the `cycle` number. We pick a start date and start incrementing from there."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "3e320356",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_timestamp(x):\n",
+    "    return start + datetime.timedelta(minutes=x * 10)\n",
+    "\n",
+    "start = datetime.datetime(2013, 1, 12)\n",
+    "train['timestamp'] = train['time, in cycles'].apply(get_timestamp)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "11f78b71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_timestamp_test(x):\n",
+    "    return last[x['unit number']] + datetime.timedelta(minutes=x['time, in cycles'] * 10)\n",
+    "\n",
+    "last = train.groupby('unit number').last()['timestamp'].to_dict()\n",
+    "test['timestamp'] = test.apply(get_timestamp_test, axis=1)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "95bec88f",
+   "metadata": {},
+   "source": [
+    "### 4. Format Data\n",
+    "\n",
+    "make `label_times` have three columns, namely: `['turbine_id', 'cutoff_time', 'target']`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "1ce4320e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:20:00</td>\n",
+       "      <td>166</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:30:00</td>\n",
+       "      <td>165</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>27</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:40:00</td>\n",
+       "      <td>164</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>28</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:50:00</td>\n",
+       "      <td>163</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>29</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 05:00:00</td>\n",
+       "      <td>162</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "    turbine_id         cutoff_time  target\n",
+       "25           1 2013-01-12 04:20:00     166\n",
+       "26           1 2013-01-12 04:30:00     165\n",
+       "27           1 2013-01-12 04:40:00     164\n",
+       "28           1 2013-01-12 04:50:00     163\n",
+       "29           1 2013-01-12 05:00:00     162"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_label_times = train[['unit number', 'timestamp', 'RUL']].copy()\n",
+    "train_label_times.columns = ['turbine_id', 'cutoff_time', 'target']\n",
+    "\n",
+    "# drop first 24 occurances\n",
+    "train_label_times = train_label_times[train_label_times.groupby('turbine_id').cumcount('turbine_id') > 24]\n",
+    "train_label_times.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "f320e753",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-13 13:10:00</td>\n",
+       "      <td>112.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2013-01-14 08:00:00</td>\n",
+       "      <td>98.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2013-01-14 02:50:00</td>\n",
+       "      <td>69.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2013-01-14 01:10:00</td>\n",
+       "      <td>82.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2013-01-14 13:10:00</td>\n",
+       "      <td>91.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   turbine_id         cutoff_time  target\n",
+       "0           1 2013-01-13 13:10:00   112.0\n",
+       "1           2 2013-01-14 08:00:00    98.0\n",
+       "2           3 2013-01-14 02:50:00    69.0\n",
+       "3           4 2013-01-14 01:10:00    82.0\n",
+       "4           5 2013-01-14 13:10:00    91.0"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_label_times = test[['unit number', 'timestamp']].groupby('unit number').last().reset_index()\n",
+    "test_label_times.columns = ['turbine_id', 'cutoff_time']\n",
+    "test_label_times['target'] = np.array(y_test).astype('float32')\n",
+    "test_label_times.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "50be8dc4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "reading_columns = ['unit number', 'timestamp'] + setting + sensor\n",
+    "readings = pd.concat([train, test])[reading_columns]\n",
+    "readings = readings.melt(id_vars=['unit number', 'timestamp'])\n",
+    "readings.columns = ['turbine_id', 'timestamp', 'signal_id', 'value']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "01a77e60",
+   "metadata": {},
+   "source": [
+    "## 5. Save Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "5f622ff7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "readings.to_csv('rul_readings.csv.gz', compression='gzip', index=False)\n",
+    "train_label_times.to_csv('rul_train_target_times.csv.gz', compression='gzip', index=False)\n",
+    "test_label_times.to_csv('rul_test_target_times.csv.gz', compression='gzip', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb b/tutorials/pipelines/dfs_xgb_with_double_normalization.ipynb
similarity index 58%
rename from tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
rename to tutorials/pipelines/dfs_xgb_with_double_normalization.ipynb
index ca00d58..6fd5f1e 100644
--- a/tutorials/pipelines/normalize_dfs_xgb_classifier.ipynb
+++ b/tutorials/pipelines/dfs_xgb_with_double_normalization.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# normalize_dfs_xgb_classifier"
+    "# dfs_xgb_with_double_normalization"
    ]
   },
   {
@@ -24,14 +24,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pipeline_name = 'classes.normalize_dfs_xgb_classifier'"
+    "pipeline_name = 'dfs_xgb_with_double_normalization'"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/dask/dataframe/utils.py:14: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
+      "  import pandas.util.testing as tm\n"
+     ]
+    }
+   ],
    "source": [
     "from draco.pipeline import DracoPipeline\n",
     "\n",
@@ -46,10 +55,7 @@
     {
      "data": {
       "text/plain": [
-       "['pandas.DataFrame.resample',\n",
-       " 'featuretools.EntitySet.entity_from_dataframe',\n",
-       " 'featuretools.EntitySet.normalize_entity',\n",
-       " 'featuretools.EntitySet.normalize_entity',\n",
+       "['mlblocks.MLPipeline',\n",
        " 'featuretools.dfs',\n",
        " 'mlprimitives.custom.feature_extraction.CategoricalEncoder',\n",
        " 'xgboost.XGBClassifier']"
@@ -267,360 +273,28 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## pandas.DataFrame.resample\n",
+    "## mlblocks.MLPipeline\n",
+    "\n",
+    "### pandas.DataFrame.resample\n",
     "\n",
     "* Input: readings\n",
     "* Output: readings (resampled)\n",
     "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
-    "  signal_id and timestamp have been set as a multi-index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 0\n",
-    "context = pipeline.fit(target_times, readings, output_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>323.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
-       "      <td>346.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
-       "      <td>407.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
-       "      <td>257.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
-       "      <td>267.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id signal_id           timestamp  value\n",
-       "0       T001       S01 2013-01-10 00:00:00  323.0\n",
-       "1       T001       S01 2013-01-10 00:10:00  346.0\n",
-       "2       T001       S01 2013-01-10 00:20:00  407.0\n",
-       "3       T001       S01 2013-01-10 00:30:00  257.0\n",
-       "4       T001       S01 2013-01-10 00:40:00  267.0"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "##  featuretools.EntitySet.entity_from_dataframe\n",
+    "  signal_id and timestamp have been set as a multi-index\n",
+    "\n",
+    "###  featuretools.EntitySet.entity_from_dataframe\n",
     "\n",
     "* Input: readings (resampled)\n",
     "* Output: entityset\n",
-    "* Effect: Entityset has been generated from readings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 1\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y', 'entityset'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Entityset: entityset\n",
-       "  Entities:\n",
-       "    readings [Rows: 1329146, Columns: 5]\n",
-       "  Relationships:\n",
-       "    No relationships"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['entityset']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>323.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
-       "      <td>346.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
-       "      <td>407.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
-       "      <td>257.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
-       "      <td>267.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id signal_id           timestamp  value\n",
-       "0       T001       S01 2013-01-10 00:00:00  323.0\n",
-       "1       T001       S01 2013-01-10 00:10:00  346.0\n",
-       "2       T001       S01 2013-01-10 00:20:00  407.0\n",
-       "3       T001       S01 2013-01-10 00:30:00  257.0\n",
-       "4       T001       S01 2013-01-10 00:40:00  267.0"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## featuretools.EntitySet.normalize_entity\n",
+    "* Effect: Entityset has been generated from readings\n",
+    "\n",
+    "### featuretools.EntitySet.normalize_entity\n",
     "\n",
     "* Input: entityset\n",
     "* Output: entityset with relationship (readings.turbine_id with turbines.turbine_id)\n",
-    "* Effect: establish relation between readings and turbines"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 2\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Entityset: entityset\n",
-       "  Entities:\n",
-       "    readings [Rows: 1329146, Columns: 5]\n",
-       "    turbines [Rows: 1, Columns: 1]\n",
-       "  Relationships:\n",
-       "    readings.turbine_id -> turbines.turbine_id"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['entityset']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## featuretools.EntitySet.normalize_entity\n",
+    "* Effect: establish relation between readings and turbines\n",
+    "\n",
+    "### featuretools.EntitySet.normalize_entity\n",
     "\n",
     "* Input: entityset\n",
     "* Output: entityset with relationship (readings.signal_id with signals.signal_id)\n",
@@ -629,26 +303,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 3\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
+    "step = 0\n",
+    "context = pipeline.fit(target_times, readings, output_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+       "dict_keys(['readings', 'turbines', 'X', 'y', 'entityset'])"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -659,7 +333,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [
     {
@@ -675,7 +349,7 @@
        "    readings.signal_id -> signals.signal_id"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -697,17 +371,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 4\n",
+    "step = 1\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
@@ -716,7 +390,7 @@
        "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -727,7 +401,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -751,27 +425,27 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>COUNT(readings)</th>\n",
+       "      <th>SUM(readings.value)</th>\n",
+       "      <th>STD(readings.value)</th>\n",
        "      <th>MAX(readings.value)</th>\n",
-       "      <th>MEAN(readings.value)</th>\n",
+       "      <th>SKEW(readings.value)</th>\n",
        "      <th>MIN(readings.value)</th>\n",
-       "      <th>MODE(readings.signal_id)</th>\n",
+       "      <th>MEAN(readings.value)</th>\n",
+       "      <th>COUNT(readings)</th>\n",
        "      <th>NUM_UNIQUE(readings.signal_id)</th>\n",
-       "      <th>SKEW(readings.value)</th>\n",
-       "      <th>STD(readings.value)</th>\n",
-       "      <th>SUM(readings.value)</th>\n",
-       "      <th>MODE(readings.DAY(timestamp))</th>\n",
+       "      <th>MODE(readings.signal_id)</th>\n",
+       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
        "      <th>...</th>\n",
-       "      <th>SKEW(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>SKEW(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>STD(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))</th>\n",
-       "      <th>STD(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>STD(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>STD(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))</th>\n",
-       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>MEAN(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>MEAN(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>turbine_id</th>\n",
@@ -801,123 +475,123 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3744</td>\n",
+       "      <td>3.433649e+09</td>\n",
+       "      <td>1.456860e+06</td>\n",
        "      <td>3448719.0</td>\n",
-       "      <td>917107.079193</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>26</td>\n",
        "      <td>1.019214</td>\n",
-       "      <td>1.456860e+06</td>\n",
-       "      <td>3.433649e+09</td>\n",
-       "      <td>11</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>7488</td>\n",
+       "      <td>917107.079193</td>\n",
        "      <td>3744</td>\n",
+       "      <td>26</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3744</td>\n",
+       "      <td>3.441489e+09</td>\n",
+       "      <td>1.459865e+06</td>\n",
        "      <td>3453777.0</td>\n",
-       "      <td>919201.162179</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>26</td>\n",
        "      <td>1.018761</td>\n",
-       "      <td>1.459865e+06</td>\n",
-       "      <td>3.441489e+09</td>\n",
-       "      <td>12</td>\n",
-       "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
+       "      <td>919201.162179</td>\n",
        "      <td>3744</td>\n",
-       "      <td>3463880.0</td>\n",
-       "      <td>922935.352244</td>\n",
-       "      <td>2.7</td>\n",
-       "      <td>S01</td>\n",
        "      <td>26</td>\n",
-       "      <td>1.018192</td>\n",
-       "      <td>1.465277e+06</td>\n",
-       "      <td>3.455470e+09</td>\n",
-       "      <td>13</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
+       "      <td>3.455470e+09</td>\n",
+       "      <td>1.465277e+06</td>\n",
+       "      <td>3463880.0</td>\n",
+       "      <td>1.018192</td>\n",
+       "      <td>2.7</td>\n",
+       "      <td>922935.352244</td>\n",
        "      <td>3744</td>\n",
-       "      <td>3474703.0</td>\n",
-       "      <td>928248.092869</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>S01</td>\n",
        "      <td>26</td>\n",
-       "      <td>1.017666</td>\n",
-       "      <td>1.473337e+06</td>\n",
-       "      <td>3.475361e+09</td>\n",
-       "      <td>14</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2</td>\n",
        "      <td>...</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>13</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
+       "      <td>3.475361e+09</td>\n",
+       "      <td>1.473337e+06</td>\n",
+       "      <td>3474703.0</td>\n",
+       "      <td>1.017666</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>928248.092869</td>\n",
        "      <td>3744</td>\n",
-       "      <td>3485019.0</td>\n",
-       "      <td>924186.531200</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>S01</td>\n",
        "      <td>26</td>\n",
-       "      <td>1.032002</td>\n",
-       "      <td>1.477958e+06</td>\n",
-       "      <td>2.888083e+09</td>\n",
-       "      <td>15</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2</td>\n",
        "      <td>...</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14</td>\n",
+       "      <td>2013</td>\n",
        "      <td>0</td>\n",
-       "      <td>0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>T001</th>\n",
+       "      <td>2.888083e+09</td>\n",
+       "      <td>1.477958e+06</td>\n",
+       "      <td>3485019.0</td>\n",
+       "      <td>1.032002</td>\n",
        "      <td>0.0</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>7488</td>\n",
+       "      <td>924186.531200</td>\n",
        "      <td>3744</td>\n",
+       "      <td>26</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2</td>\n",
+       "      <td>...</td>\n",
+       "      <td>2.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -925,130 +599,122 @@
        "</div>"
       ],
       "text/plain": [
-       "            COUNT(readings)  MAX(readings.value)  MEAN(readings.value)  \\\n",
-       "turbine_id                                                               \n",
-       "T001                   3744            3448719.0         917107.079193   \n",
-       "T001                   3744            3453777.0         919201.162179   \n",
-       "T001                   3744            3463880.0         922935.352244   \n",
-       "T001                   3744            3474703.0         928248.092869   \n",
-       "T001                   3744            3485019.0         924186.531200   \n",
-       "\n",
-       "            MIN(readings.value) MODE(readings.signal_id)  \\\n",
-       "turbine_id                                                 \n",
-       "T001                        0.0                      S01   \n",
-       "T001                        0.0                      S01   \n",
-       "T001                        2.7                      S01   \n",
-       "T001                       -1.0                      S01   \n",
-       "T001                        0.0                      S01   \n",
+       "            SUM(readings.value)  STD(readings.value)  MAX(readings.value)  \\\n",
+       "turbine_id                                                                  \n",
+       "T001               3.433649e+09         1.456860e+06            3448719.0   \n",
+       "T001               3.441489e+09         1.459865e+06            3453777.0   \n",
+       "T001               3.455470e+09         1.465277e+06            3463880.0   \n",
+       "T001               3.475361e+09         1.473337e+06            3474703.0   \n",
+       "T001               2.888083e+09         1.477958e+06            3485019.0   \n",
        "\n",
-       "            NUM_UNIQUE(readings.signal_id)  SKEW(readings.value)  \\\n",
-       "turbine_id                                                         \n",
-       "T001                                    26              1.019214   \n",
-       "T001                                    26              1.018761   \n",
-       "T001                                    26              1.018192   \n",
-       "T001                                    26              1.017666   \n",
-       "T001                                    26              1.032002   \n",
+       "            SKEW(readings.value)  MIN(readings.value)  MEAN(readings.value)  \\\n",
+       "turbine_id                                                                    \n",
+       "T001                    1.019214                  0.0         917107.079193   \n",
+       "T001                    1.018761                  0.0         919201.162179   \n",
+       "T001                    1.018192                  2.7         922935.352244   \n",
+       "T001                    1.017666                 -1.0         928248.092869   \n",
+       "T001                    1.032002                  0.0         924186.531200   \n",
        "\n",
-       "            STD(readings.value)  SUM(readings.value)  \\\n",
-       "turbine_id                                             \n",
-       "T001               1.456860e+06         3.433649e+09   \n",
-       "T001               1.459865e+06         3.441489e+09   \n",
-       "T001               1.465277e+06         3.455470e+09   \n",
-       "T001               1.473337e+06         3.475361e+09   \n",
-       "T001               1.477958e+06         2.888083e+09   \n",
+       "            COUNT(readings)  NUM_UNIQUE(readings.signal_id)  \\\n",
+       "turbine_id                                                    \n",
+       "T001                   3744                              26   \n",
+       "T001                   3744                              26   \n",
+       "T001                   3744                              26   \n",
+       "T001                   3744                              26   \n",
+       "T001                   3744                              26   \n",
        "\n",
-       "            MODE(readings.DAY(timestamp))  ...  \\\n",
-       "turbine_id                                 ...   \n",
-       "T001                                   11  ...   \n",
-       "T001                                   12  ...   \n",
-       "T001                                   13  ...   \n",
-       "T001                                   14  ...   \n",
-       "T001                                   15  ...   \n",
+       "           MODE(readings.signal_id)  NUM_UNIQUE(readings.DAY(timestamp))  ...  \\\n",
+       "turbine_id                                                                ...   \n",
+       "T001                            S01                                    2  ...   \n",
+       "T001                            S01                                    2  ...   \n",
+       "T001                            S01                                    2  ...   \n",
+       "T001                            S01                                    2  ...   \n",
+       "T001                            S01                                    2  ...   \n",
        "\n",
-       "            SKEW(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
+       "            MEAN(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
        "turbine_id                                                                   \n",
-       "T001                                                        0                \n",
-       "T001                                                        0                \n",
-       "T001                                                        0                \n",
-       "T001                                                        0                \n",
-       "T001                                                        0                \n",
+       "T001                                                      2.0                \n",
+       "T001                                                      2.0                \n",
+       "T001                                                      2.0                \n",
+       "T001                                                      2.0                \n",
+       "T001                                                      2.0                \n",
        "\n",
-       "            SKEW(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \\\n",
+       "            MEAN(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \\\n",
        "turbine_id                                                                \n",
-       "T001                                                        0             \n",
-       "T001                                                        0             \n",
-       "T001                                                        0             \n",
-       "T001                                                        0             \n",
-       "T001                                                        0             \n",
+       "T001                                                      1.0             \n",
+       "T001                                                      1.0             \n",
+       "T001                                                      1.0             \n",
+       "T001                                                      1.0             \n",
+       "T001                                                      1.0             \n",
        "\n",
-       "            STD(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                              \n",
-       "T001                                                      0.0           \n",
-       "T001                                                      0.0           \n",
-       "T001                                                      0.0           \n",
-       "T001                                                      0.0           \n",
-       "T001                                                      0.0           \n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                               \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
        "\n",
-       "            STD(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))  \\\n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
        "turbine_id                                                                \n",
-       "T001                                                      0.0             \n",
-       "T001                                                      0.0             \n",
-       "T001                                                      0.0             \n",
-       "T001                                                      0.0             \n",
-       "T001                                                      0.0             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
        "\n",
-       "            STD(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                                  \n",
-       "T001                                                      0.0               \n",
-       "T001                                                      0.0               \n",
-       "T001                                                      0.0               \n",
-       "T001                                                      0.0               \n",
-       "T001                                                      0.0               \n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                                   \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
        "\n",
-       "            STD(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \\\n",
-       "turbine_id                                                               \n",
-       "T001                                                      0.0            \n",
-       "T001                                                      0.0            \n",
-       "T001                                                      0.0            \n",
-       "T001                                                      0.0            \n",
-       "T001                                                      0.0            \n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
+       "turbine_id                                                                 \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
        "\n",
-       "            SUM(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                              \n",
-       "T001                                                     7488           \n",
-       "T001                                                     7488           \n",
-       "T001                                                     7488           \n",
-       "T001                                                     7488           \n",
-       "T001                                                     7488           \n",
+       "            MODE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                         \n",
+       "T001                                                       11      \n",
+       "T001                                                       12      \n",
+       "T001                                                       13      \n",
+       "T001                                                       14      \n",
+       "T001                                                       15      \n",
        "\n",
-       "            SUM(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))  \\\n",
-       "turbine_id                                                                \n",
-       "T001                                                     3744             \n",
-       "T001                                                     3744             \n",
-       "T001                                                     3744             \n",
-       "T001                                                     3744             \n",
-       "T001                                                     3744             \n",
+       "            MODE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
+       "turbine_id                                                          \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
        "\n",
-       "            SUM(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                                  \n",
-       "T001                                                     7488               \n",
-       "T001                                                     7488               \n",
-       "T001                                                     7488               \n",
-       "T001                                                     7488               \n",
-       "T001                                                     7488               \n",
+       "            MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                             \n",
+       "T001                                                        4          \n",
+       "T001                                                        5          \n",
+       "T001                                                        6          \n",
+       "T001                                                        0          \n",
+       "T001                                                        1          \n",
        "\n",
-       "            SUM(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \n",
-       "turbine_id                                                              \n",
-       "T001                                                     3744           \n",
-       "T001                                                     3744           \n",
-       "T001                                                     3744           \n",
-       "T001                                                     3744           \n",
-       "T001                                                     3744           \n",
+       "            MODE(readings.signals.MODE(readings.MONTH(timestamp)))  \n",
+       "turbine_id                                                          \n",
+       "T001                                                        1       \n",
+       "T001                                                        1       \n",
+       "T001                                                        1       \n",
+       "T001                                                        1       \n",
+       "T001                                                        1       \n",
        "\n",
        "[5 rows x 99 columns]"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1059,7 +725,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [
     {
@@ -1068,7 +734,7 @@
        "99"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 13,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1080,7 +746,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -1115,51 +781,51 @@
        "      <th>0</th>\n",
        "      <td>T001</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>2013-01-10</td>\n",
        "      <td>323.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
-       "      <td>346.0</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>320.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
-       "      <td>407.0</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>284.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
-       "      <td>257.0</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>348.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
-       "      <td>267.0</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>273.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  turbine_id signal_id           timestamp  value\n",
-       "0       T001       S01 2013-01-10 00:00:00  323.0\n",
-       "1       T001       S01 2013-01-10 00:10:00  346.0\n",
-       "2       T001       S01 2013-01-10 00:20:00  407.0\n",
-       "3       T001       S01 2013-01-10 00:30:00  257.0\n",
-       "4       T001       S01 2013-01-10 00:40:00  267.0"
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-10  323.0\n",
+       "1       T001       S02 2013-01-10  320.0\n",
+       "2       T001       S03 2013-01-10  284.0\n",
+       "3       T001       S04 2013-01-10  348.0\n",
+       "4       T001       S05 2013-01-10  273.0"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1181,17 +847,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 5\n",
+    "step = 2\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -1200,7 +866,7 @@
        "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1211,7 +877,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [
     {
@@ -1235,25 +901,25 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>COUNT(readings)</th>\n",
+       "      <th>SUM(readings.value)</th>\n",
+       "      <th>STD(readings.value)</th>\n",
        "      <th>MAX(readings.value)</th>\n",
-       "      <th>MEAN(readings.value)</th>\n",
+       "      <th>SKEW(readings.value)</th>\n",
        "      <th>MIN(readings.value)</th>\n",
+       "      <th>MEAN(readings.value)</th>\n",
+       "      <th>COUNT(readings)</th>\n",
        "      <th>NUM_UNIQUE(readings.signal_id)</th>\n",
-       "      <th>SKEW(readings.value)</th>\n",
-       "      <th>STD(readings.value)</th>\n",
-       "      <th>SUM(readings.value)</th>\n",
-       "      <th>MODE(readings.DAY(timestamp))</th>\n",
-       "      <th>MODE(readings.MONTH(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
        "      <th>...</th>\n",
-       "      <th>STD(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))</th>\n",
-       "      <th>STD(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>STD(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>STD(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))</th>\n",
-       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>SUM(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
+       "      <th>MODE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
        "      <th>MODE(readings.signal_id)=S01</th>\n",
        "      <th>MODE(readings.signals.MODE(readings.turbine_id))=T001</th>\n",
        "    </tr>\n",
@@ -1285,121 +951,121 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3744</td>\n",
+       "      <td>3.433649e+09</td>\n",
+       "      <td>1.456860e+06</td>\n",
        "      <td>3448719.0</td>\n",
-       "      <td>917107.079193</td>\n",
+       "      <td>1.019214</td>\n",
        "      <td>0.0</td>\n",
+       "      <td>917107.079193</td>\n",
+       "      <td>3744</td>\n",
        "      <td>26</td>\n",
-       "      <td>1.019214</td>\n",
-       "      <td>1.456860e+06</td>\n",
-       "      <td>3.433649e+09</td>\n",
-       "      <td>11</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>4</td>\n",
+       "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3744</td>\n",
+       "      <td>3.441489e+09</td>\n",
+       "      <td>1.459865e+06</td>\n",
        "      <td>3453777.0</td>\n",
-       "      <td>919201.162179</td>\n",
+       "      <td>1.018761</td>\n",
        "      <td>0.0</td>\n",
+       "      <td>919201.162179</td>\n",
+       "      <td>3744</td>\n",
        "      <td>26</td>\n",
-       "      <td>1.018761</td>\n",
-       "      <td>1.459865e+06</td>\n",
-       "      <td>3.441489e+09</td>\n",
-       "      <td>12</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>5</td>\n",
+       "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3744</td>\n",
+       "      <td>3.455470e+09</td>\n",
+       "      <td>1.465277e+06</td>\n",
        "      <td>3463880.0</td>\n",
-       "      <td>922935.352244</td>\n",
+       "      <td>1.018192</td>\n",
        "      <td>2.7</td>\n",
+       "      <td>922935.352244</td>\n",
+       "      <td>3744</td>\n",
        "      <td>26</td>\n",
-       "      <td>1.018192</td>\n",
-       "      <td>1.465277e+06</td>\n",
-       "      <td>3.455470e+09</td>\n",
-       "      <td>13</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>13</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>6</td>\n",
+       "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3744</td>\n",
+       "      <td>3.475361e+09</td>\n",
+       "      <td>1.473337e+06</td>\n",
        "      <td>3474703.0</td>\n",
-       "      <td>928248.092869</td>\n",
+       "      <td>1.017666</td>\n",
        "      <td>-1.0</td>\n",
+       "      <td>928248.092869</td>\n",
+       "      <td>3744</td>\n",
        "      <td>26</td>\n",
-       "      <td>1.017666</td>\n",
-       "      <td>1.473337e+06</td>\n",
-       "      <td>3.475361e+09</td>\n",
-       "      <td>14</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>14</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>3744</td>\n",
+       "      <td>2.888083e+09</td>\n",
+       "      <td>1.477958e+06</td>\n",
        "      <td>3485019.0</td>\n",
-       "      <td>924186.531200</td>\n",
+       "      <td>1.032002</td>\n",
        "      <td>0.0</td>\n",
+       "      <td>924186.531200</td>\n",
+       "      <td>3744</td>\n",
        "      <td>26</td>\n",
-       "      <td>1.032002</td>\n",
-       "      <td>1.477958e+06</td>\n",
-       "      <td>2.888083e+09</td>\n",
-       "      <td>15</td>\n",
+       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>7488</td>\n",
-       "      <td>3744</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "      <td>1</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
@@ -1409,101 +1075,109 @@
        "</div>"
       ],
       "text/plain": [
-       "            COUNT(readings)  MAX(readings.value)  MEAN(readings.value)  \\\n",
-       "turbine_id                                                               \n",
-       "T001                   3744            3448719.0         917107.079193   \n",
-       "T001                   3744            3453777.0         919201.162179   \n",
-       "T001                   3744            3463880.0         922935.352244   \n",
-       "T001                   3744            3474703.0         928248.092869   \n",
-       "T001                   3744            3485019.0         924186.531200   \n",
+       "            SUM(readings.value)  STD(readings.value)  MAX(readings.value)  \\\n",
+       "turbine_id                                                                  \n",
+       "T001               3.433649e+09         1.456860e+06            3448719.0   \n",
+       "T001               3.441489e+09         1.459865e+06            3453777.0   \n",
+       "T001               3.455470e+09         1.465277e+06            3463880.0   \n",
+       "T001               3.475361e+09         1.473337e+06            3474703.0   \n",
+       "T001               2.888083e+09         1.477958e+06            3485019.0   \n",
        "\n",
-       "            MIN(readings.value)  NUM_UNIQUE(readings.signal_id)  \\\n",
-       "turbine_id                                                        \n",
-       "T001                        0.0                              26   \n",
-       "T001                        0.0                              26   \n",
-       "T001                        2.7                              26   \n",
-       "T001                       -1.0                              26   \n",
-       "T001                        0.0                              26   \n",
+       "            SKEW(readings.value)  MIN(readings.value)  MEAN(readings.value)  \\\n",
+       "turbine_id                                                                    \n",
+       "T001                    1.019214                  0.0         917107.079193   \n",
+       "T001                    1.018761                  0.0         919201.162179   \n",
+       "T001                    1.018192                  2.7         922935.352244   \n",
+       "T001                    1.017666                 -1.0         928248.092869   \n",
+       "T001                    1.032002                  0.0         924186.531200   \n",
        "\n",
-       "            SKEW(readings.value)  STD(readings.value)  SUM(readings.value)  \\\n",
-       "turbine_id                                                                   \n",
-       "T001                    1.019214         1.456860e+06         3.433649e+09   \n",
-       "T001                    1.018761         1.459865e+06         3.441489e+09   \n",
-       "T001                    1.018192         1.465277e+06         3.455470e+09   \n",
-       "T001                    1.017666         1.473337e+06         3.475361e+09   \n",
-       "T001                    1.032002         1.477958e+06         2.888083e+09   \n",
+       "            COUNT(readings)  NUM_UNIQUE(readings.signal_id)  \\\n",
+       "turbine_id                                                    \n",
+       "T001                   3744                              26   \n",
+       "T001                   3744                              26   \n",
+       "T001                   3744                              26   \n",
+       "T001                   3744                              26   \n",
+       "T001                   3744                              26   \n",
        "\n",
-       "            MODE(readings.DAY(timestamp))  MODE(readings.MONTH(timestamp))  \\\n",
-       "turbine_id                                                                   \n",
-       "T001                                   11                                1   \n",
-       "T001                                   12                                1   \n",
-       "T001                                   13                                1   \n",
-       "T001                                   14                                1   \n",
-       "T001                                   15                                1   \n",
+       "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
+       "turbine_id                                        \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "T001                                          2   \n",
+       "\n",
+       "            NUM_UNIQUE(readings.MONTH(timestamp))  ...  \\\n",
+       "turbine_id                                         ...   \n",
+       "T001                                            1  ...   \n",
+       "T001                                            1  ...   \n",
+       "T001                                            1  ...   \n",
+       "T001                                            1  ...   \n",
+       "T001                                            1  ...   \n",
        "\n",
-       "            ...  STD(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id  ...                                                              \n",
-       "T001        ...                                                0.0           \n",
-       "T001        ...                                                0.0           \n",
-       "T001        ...                                                0.0           \n",
-       "T001        ...                                                0.0           \n",
-       "T001        ...                                                0.0           \n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                               \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
+       "T001                                                        1            \n",
        "\n",
-       "            STD(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))  \\\n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
        "turbine_id                                                                \n",
-       "T001                                                      0.0             \n",
-       "T001                                                      0.0             \n",
-       "T001                                                      0.0             \n",
-       "T001                                                      0.0             \n",
-       "T001                                                      0.0             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
+       "T001                                                        1             \n",
        "\n",
-       "            STD(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                                  \n",
-       "T001                                                      0.0               \n",
-       "T001                                                      0.0               \n",
-       "T001                                                      0.0               \n",
-       "T001                                                      0.0               \n",
-       "T001                                                      0.0               \n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                                   \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
+       "T001                                                        1                \n",
        "\n",
-       "            STD(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \\\n",
-       "turbine_id                                                               \n",
-       "T001                                                      0.0            \n",
-       "T001                                                      0.0            \n",
-       "T001                                                      0.0            \n",
-       "T001                                                      0.0            \n",
-       "T001                                                      0.0            \n",
+       "            NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
+       "turbine_id                                                                 \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
+       "T001                                                        1              \n",
        "\n",
-       "            SUM(readings.signals.NUM_UNIQUE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                              \n",
-       "T001                                                     7488           \n",
-       "T001                                                     7488           \n",
-       "T001                                                     7488           \n",
-       "T001                                                     7488           \n",
-       "T001                                                     7488           \n",
+       "            MODE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
+       "turbine_id                                                         \n",
+       "T001                                                       11      \n",
+       "T001                                                       12      \n",
+       "T001                                                       13      \n",
+       "T001                                                       14      \n",
+       "T001                                                       15      \n",
        "\n",
-       "            SUM(readings.signals.NUM_UNIQUE(readings.MONTH(timestamp)))  \\\n",
-       "turbine_id                                                                \n",
-       "T001                                                     3744             \n",
-       "T001                                                     3744             \n",
-       "T001                                                     3744             \n",
-       "T001                                                     3744             \n",
-       "T001                                                     3744             \n",
+       "            MODE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
+       "turbine_id                                                          \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
+       "T001                                                     2013       \n",
        "\n",
-       "            SUM(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                                  \n",
-       "T001                                                     7488               \n",
-       "T001                                                     7488               \n",
-       "T001                                                     7488               \n",
-       "T001                                                     7488               \n",
-       "T001                                                     7488               \n",
+       "            MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
+       "turbine_id                                                             \n",
+       "T001                                                        4          \n",
+       "T001                                                        5          \n",
+       "T001                                                        6          \n",
+       "T001                                                        0          \n",
+       "T001                                                        1          \n",
        "\n",
-       "            SUM(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \\\n",
-       "turbine_id                                                               \n",
-       "T001                                                     3744            \n",
-       "T001                                                     3744            \n",
-       "T001                                                     3744            \n",
-       "T001                                                     3744            \n",
-       "T001                                                     3744            \n",
+       "            MODE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
+       "turbine_id                                                           \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
+       "T001                                                        1        \n",
        "\n",
        "            MODE(readings.signal_id)=S01  \\\n",
        "turbine_id                                 \n",
@@ -1524,7 +1198,7 @@
        "[5 rows x 99 columns]"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1535,7 +1209,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -1570,51 +1244,51 @@
        "      <th>0</th>\n",
        "      <td>T001</td>\n",
        "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>2013-01-10</td>\n",
        "      <td>323.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
-       "      <td>346.0</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>320.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
-       "      <td>407.0</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>284.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
-       "      <td>257.0</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>348.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
-       "      <td>267.0</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>273.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "  turbine_id signal_id           timestamp  value\n",
-       "0       T001       S01 2013-01-10 00:00:00  323.0\n",
-       "1       T001       S01 2013-01-10 00:10:00  346.0\n",
-       "2       T001       S01 2013-01-10 00:20:00  407.0\n",
-       "3       T001       S01 2013-01-10 00:30:00  257.0\n",
-       "4       T001       S01 2013-01-10 00:40:00  267.0"
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-10  323.0\n",
+       "1       T001       S02 2013-01-10  320.0\n",
+       "2       T001       S03 2013-01-10  284.0\n",
+       "3       T001       S04 2013-01-10  348.0\n",
+       "4       T001       S05 2013-01-10  273.0"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1636,17 +1310,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 6\n",
+    "step = 3\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -1655,7 +1329,7 @@
        "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1667,7 +1341,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1681,7 +1355,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.12"
+   "version": "3.7.11"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb b/tutorials/pipelines/dfs_xgb_with_unstack_normalization.ipynb
similarity index 68%
rename from tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb
rename to tutorials/pipelines/dfs_xgb_with_unstack_normalization.ipynb
index 84530a2..5731706 100644
--- a/tutorials/pipelines/unstack_normalize_dfs_xgb_classifier.ipynb
+++ b/tutorials/pipelines/dfs_xgb_with_unstack_normalization.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# unstack_normalize_dfs_xgb_classifier"
+    "# dfs_xgb_with_unstack_normalization"
    ]
   },
   {
@@ -24,14 +24,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "pipeline_name = 'classes.unstack_normalize_dfs_xgb_classifier'"
+    "pipeline_name = 'dfs_xgb_with_unstack_normalization'"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/dask/dataframe/utils.py:14: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
+      "  import pandas.util.testing as tm\n"
+     ]
+    }
+   ],
    "source": [
     "from draco.pipeline import DracoPipeline\n",
     "\n",
@@ -46,10 +55,8 @@
     {
      "data": {
       "text/plain": [
-       "['pandas.DataFrame.resample',\n",
-       " 'pandas.DataFrame.unstack',\n",
-       " 'featuretools.EntitySet.entity_from_dataframe',\n",
-       " 'featuretools.EntitySet.normalize_entity',\n",
+       "['mlblocks.MLPipeline',\n",
+       " 'mlblocks.MLPipeline',\n",
        " 'featuretools.dfs',\n",
        " 'mlprimitives.custom.feature_extraction.CategoricalEncoder',\n",
        " 'xgboost.XGBClassifier']"
@@ -267,12 +274,20 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## pandas.DataFrame.resample\n",
+    "## mlblocks.MLPipeline 1\n",
+    "\n",
+    "### pandas.DataFrame.resample\n",
     "\n",
     "* Input: readings\n",
     "* Output: readings (resampled)\n",
     "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
-    "  signal_id and timestamp have been set as a multi-index"
+    "  signal_id and timestamp have been set as a multi-index\n",
+    "  \n",
+    "### pandas.DataFrame.unstack\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: readings (unstacked)\n",
+    "* Effect: readings have been unstacked"
    ]
   },
   {
@@ -309,130 +324,6 @@
    "cell_type": "code",
    "execution_count": 9,
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"5\" valign=\"top\">T001</th>\n",
-       "      <th rowspan=\"5\" valign=\"top\">S01</th>\n",
-       "      <th>2013-01-10 00:00:00</th>\n",
-       "      <td>323.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 00:10:00</th>\n",
-       "      <td>346.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 00:20:00</th>\n",
-       "      <td>407.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 00:30:00</th>\n",
-       "      <td>257.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 00:40:00</th>\n",
-       "      <td>267.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                          value\n",
-       "turbine_id signal_id timestamp                 \n",
-       "T001       S01       2013-01-10 00:00:00  323.0\n",
-       "                     2013-01-10 00:10:00  346.0\n",
-       "                     2013-01-10 00:20:00  407.0\n",
-       "                     2013-01-10 00:30:00  257.0\n",
-       "                     2013-01-10 00:40:00  267.0"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.unstack\n",
-    "\n",
-    "* Input: readings (resampled)\n",
-    "* Output: readings (unstacked)\n",
-    "* Effect: readings have been unstacked"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 1\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -636,7 +527,7 @@
        "[5 rows x 28 columns]"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -649,72 +540,15 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "##  featuretools.EntitySet.entity_from_dataframe\n",
+    "## mlblocks.MLPipeline 2\n",
+    "\n",
+    "###  featuretools.EntitySet.entity_from_dataframe\n",
     "\n",
     "* Input: readings (resampled)\n",
     "* Output: entityset\n",
-    "* Effect: Entityset has been generated from readings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 2\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y', 'entityset'])"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Entityset: entityset\n",
-       "  Entities:\n",
-       "    readings [Rows: 51121, Columns: 29]\n",
-       "  Relationships:\n",
-       "    No relationships"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['entityset']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## featuretools.EntitySet.normalize_entity\n",
+    "* Effect: Entityset has been generated from readings\n",
+    "\n",
+    "### featuretools.EntitySet.normalize_entity\n",
     "\n",
     "* Input: entityset\n",
     "* Output: entityset with relationship (readings.turbine_id with turbines.turbine_id)\n",
@@ -723,26 +557,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 3\n",
+    "step = 1\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
+       "dict_keys(['readings', 'turbines', 'X', 'y', 'entityset'])"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -753,7 +587,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [
     {
@@ -767,7 +601,7 @@
        "    readings.turbine_id -> turbines.turbine_id"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -789,17 +623,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 4\n",
+    "step = 2\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -808,7 +642,7 @@
        "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -819,7 +653,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -843,27 +677,27 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>COUNT(readings)</th>\n",
-       "      <th>MAX(readings.value_S01)</th>\n",
-       "      <th>MAX(readings.value_S02)</th>\n",
-       "      <th>MAX(readings.value_S03)</th>\n",
-       "      <th>MAX(readings.value_S04)</th>\n",
-       "      <th>MAX(readings.value_S05)</th>\n",
-       "      <th>MAX(readings.value_S06)</th>\n",
-       "      <th>MAX(readings.value_S07)</th>\n",
-       "      <th>MAX(readings.value_S08)</th>\n",
-       "      <th>MAX(readings.value_S09)</th>\n",
-       "      <th>...</th>\n",
+       "      <th>SUM(readings.value_S14)</th>\n",
+       "      <th>SUM(readings.value_S11)</th>\n",
        "      <th>SUM(readings.value_S25)</th>\n",
-       "      <th>SUM(readings.value_S26)</th>\n",
-       "      <th>MODE(readings.DAY(timestamp))</th>\n",
-       "      <th>MODE(readings.MONTH(timestamp))</th>\n",
-       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
-       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
+       "      <th>SUM(readings.value_S23)</th>\n",
+       "      <th>SUM(readings.value_S17)</th>\n",
+       "      <th>SUM(readings.value_S19)</th>\n",
+       "      <th>SUM(readings.value_S04)</th>\n",
+       "      <th>SUM(readings.value_S05)</th>\n",
+       "      <th>SUM(readings.value_S21)</th>\n",
+       "      <th>SUM(readings.value_S16)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>MEAN(readings.value_S20)</th>\n",
+       "      <th>COUNT(readings)</th>\n",
+       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
        "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
        "      <th>NUM_UNIQUE(readings.WEEKDAY(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
+       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
+       "      <th>MODE(readings.MONTH(timestamp))</th>\n",
+       "      <th>MODE(readings.DAY(timestamp))</th>\n",
+       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>turbine_id</th>\n",
@@ -893,122 +727,122 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>144</td>\n",
-       "      <td>369.0</td>\n",
-       "      <td>376.0</td>\n",
-       "      <td>378.0</td>\n",
-       "      <td>401.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>324.0</td>\n",
-       "      <td>301.0</td>\n",
-       "      <td>3209069.0</td>\n",
-       "      <td>706654.0</td>\n",
-       "      <td>...</td>\n",
+       "      <td>465421817.0</td>\n",
+       "      <td>496362516.0</td>\n",
        "      <td>2743.0</td>\n",
-       "      <td>20569.0</td>\n",
-       "      <td>11</td>\n",
+       "      <td>2780.0</td>\n",
+       "      <td>994.6</td>\n",
+       "      <td>3174.0</td>\n",
+       "      <td>19412.0</td>\n",
+       "      <td>17083.0</td>\n",
+       "      <td>3061.0</td>\n",
+       "      <td>550.4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>22.326389</td>\n",
+       "      <td>144</td>\n",
        "      <td>1</td>\n",
-       "      <td>4</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2013</td>\n",
        "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>144</td>\n",
-       "      <td>505.0</td>\n",
-       "      <td>426.0</td>\n",
-       "      <td>393.0</td>\n",
-       "      <td>517.0</td>\n",
-       "      <td>469.0</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>459.0</td>\n",
-       "      <td>3214181.0</td>\n",
-       "      <td>711718.0</td>\n",
-       "      <td>...</td>\n",
+       "      <td>465897578.0</td>\n",
+       "      <td>496952628.0</td>\n",
        "      <td>4237.0</td>\n",
-       "      <td>32991.0</td>\n",
-       "      <td>12</td>\n",
+       "      <td>4640.0</td>\n",
+       "      <td>1166.7</td>\n",
+       "      <td>5112.0</td>\n",
+       "      <td>38289.0</td>\n",
+       "      <td>34344.0</td>\n",
+       "      <td>4919.0</td>\n",
+       "      <td>713.7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>35.166667</td>\n",
+       "      <td>144</td>\n",
        "      <td>1</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2013</td>\n",
        "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>144</td>\n",
-       "      <td>827.0</td>\n",
-       "      <td>794.0</td>\n",
-       "      <td>839.0</td>\n",
-       "      <td>848.0</td>\n",
-       "      <td>843.0</td>\n",
-       "      <td>843.0</td>\n",
-       "      <td>844.0</td>\n",
-       "      <td>3223315.0</td>\n",
-       "      <td>719405.0</td>\n",
-       "      <td>...</td>\n",
+       "      <td>466806830.0</td>\n",
+       "      <td>498019072.0</td>\n",
        "      <td>9008.0</td>\n",
-       "      <td>63463.0</td>\n",
-       "      <td>13</td>\n",
+       "      <td>9179.0</td>\n",
+       "      <td>1581.7</td>\n",
+       "      <td>9134.0</td>\n",
+       "      <td>86707.0</td>\n",
+       "      <td>78749.0</td>\n",
+       "      <td>9863.0</td>\n",
+       "      <td>916.3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>53.381944</td>\n",
+       "      <td>144</td>\n",
        "      <td>1</td>\n",
-       "      <td>6</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2013</td>\n",
        "      <td>1</td>\n",
+       "      <td>13</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>144</td>\n",
-       "      <td>848.0</td>\n",
-       "      <td>841.0</td>\n",
-       "      <td>838.0</td>\n",
-       "      <td>849.0</td>\n",
-       "      <td>850.0</td>\n",
-       "      <td>848.0</td>\n",
-       "      <td>850.0</td>\n",
-       "      <td>3233989.0</td>\n",
-       "      <td>728250.0</td>\n",
-       "      <td>...</td>\n",
+       "      <td>468250434.0</td>\n",
+       "      <td>499530451.0</td>\n",
        "      <td>10073.0</td>\n",
-       "      <td>70393.0</td>\n",
-       "      <td>14</td>\n",
+       "      <td>10310.0</td>\n",
+       "      <td>1690.9</td>\n",
+       "      <td>10674.0</td>\n",
+       "      <td>87907.0</td>\n",
+       "      <td>83264.0</td>\n",
+       "      <td>10638.0</td>\n",
+       "      <td>970.6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>61.423611</td>\n",
+       "      <td>144</td>\n",
        "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2013</td>\n",
        "      <td>1</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>144</td>\n",
-       "      <td>825.0</td>\n",
-       "      <td>840.0</td>\n",
-       "      <td>840.0</td>\n",
-       "      <td>844.0</td>\n",
-       "      <td>844.0</td>\n",
-       "      <td>830.0</td>\n",
-       "      <td>839.0</td>\n",
-       "      <td>3242820.0</td>\n",
-       "      <td>738155.0</td>\n",
-       "      <td>...</td>\n",
+       "      <td>371675934.0</td>\n",
+       "      <td>400196323.0</td>\n",
        "      <td>7381.0</td>\n",
-       "      <td>59954.0</td>\n",
-       "      <td>15</td>\n",
+       "      <td>8228.0</td>\n",
+       "      <td>1666.0</td>\n",
+       "      <td>8831.0</td>\n",
+       "      <td>68811.0</td>\n",
+       "      <td>64088.0</td>\n",
+       "      <td>8629.0</td>\n",
+       "      <td>948.8</td>\n",
+       "      <td>...</td>\n",
+       "      <td>87.575221</td>\n",
+       "      <td>144</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>2013</td>\n",
        "      <td>2</td>\n",
-       "      <td>1</td>\n",
        "      <td>2</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1017,69 +851,69 @@
        "</div>"
       ],
       "text/plain": [
-       "            COUNT(readings)  MAX(readings.value_S01)  MAX(readings.value_S02)  \\\n",
-       "turbine_id                                                                      \n",
-       "T001                    144                    369.0                    376.0   \n",
-       "T001                    144                    505.0                    426.0   \n",
-       "T001                    144                    827.0                    794.0   \n",
-       "T001                    144                    848.0                    841.0   \n",
-       "T001                    144                    825.0                    840.0   \n",
+       "            SUM(readings.value_S14)  SUM(readings.value_S11)  \\\n",
+       "turbine_id                                                     \n",
+       "T001                    465421817.0              496362516.0   \n",
+       "T001                    465897578.0              496952628.0   \n",
+       "T001                    466806830.0              498019072.0   \n",
+       "T001                    468250434.0              499530451.0   \n",
+       "T001                    371675934.0              400196323.0   \n",
        "\n",
-       "            MAX(readings.value_S03)  MAX(readings.value_S04)  \\\n",
+       "            SUM(readings.value_S25)  SUM(readings.value_S23)  \\\n",
        "turbine_id                                                     \n",
-       "T001                          378.0                    401.0   \n",
-       "T001                          393.0                    517.0   \n",
-       "T001                          839.0                    848.0   \n",
-       "T001                          838.0                    849.0   \n",
-       "T001                          840.0                    844.0   \n",
+       "T001                         2743.0                   2780.0   \n",
+       "T001                         4237.0                   4640.0   \n",
+       "T001                         9008.0                   9179.0   \n",
+       "T001                        10073.0                  10310.0   \n",
+       "T001                         7381.0                   8228.0   \n",
        "\n",
-       "            MAX(readings.value_S05)  MAX(readings.value_S06)  \\\n",
+       "            SUM(readings.value_S17)  SUM(readings.value_S19)  \\\n",
        "turbine_id                                                     \n",
-       "T001                          317.0                    324.0   \n",
-       "T001                          469.0                    407.0   \n",
-       "T001                          843.0                    843.0   \n",
-       "T001                          850.0                    848.0   \n",
-       "T001                          844.0                    830.0   \n",
+       "T001                          994.6                   3174.0   \n",
+       "T001                         1166.7                   5112.0   \n",
+       "T001                         1581.7                   9134.0   \n",
+       "T001                         1690.9                  10674.0   \n",
+       "T001                         1666.0                   8831.0   \n",
        "\n",
-       "            MAX(readings.value_S07)  MAX(readings.value_S08)  \\\n",
+       "            SUM(readings.value_S04)  SUM(readings.value_S05)  \\\n",
        "turbine_id                                                     \n",
-       "T001                          301.0                3209069.0   \n",
-       "T001                          459.0                3214181.0   \n",
-       "T001                          844.0                3223315.0   \n",
-       "T001                          850.0                3233989.0   \n",
-       "T001                          839.0                3242820.0   \n",
+       "T001                        19412.0                  17083.0   \n",
+       "T001                        38289.0                  34344.0   \n",
+       "T001                        86707.0                  78749.0   \n",
+       "T001                        87907.0                  83264.0   \n",
+       "T001                        68811.0                  64088.0   \n",
        "\n",
-       "            MAX(readings.value_S09)  ...  SUM(readings.value_S25)  \\\n",
-       "turbine_id                           ...                            \n",
-       "T001                       706654.0  ...                   2743.0   \n",
-       "T001                       711718.0  ...                   4237.0   \n",
-       "T001                       719405.0  ...                   9008.0   \n",
-       "T001                       728250.0  ...                  10073.0   \n",
-       "T001                       738155.0  ...                   7381.0   \n",
+       "            SUM(readings.value_S21)  SUM(readings.value_S16)  ...  \\\n",
+       "turbine_id                                                    ...   \n",
+       "T001                         3061.0                    550.4  ...   \n",
+       "T001                         4919.0                    713.7  ...   \n",
+       "T001                         9863.0                    916.3  ...   \n",
+       "T001                        10638.0                    970.6  ...   \n",
+       "T001                         8629.0                    948.8  ...   \n",
        "\n",
-       "            SUM(readings.value_S26)  MODE(readings.DAY(timestamp))  \\\n",
-       "turbine_id                                                           \n",
-       "T001                        20569.0                             11   \n",
-       "T001                        32991.0                             12   \n",
-       "T001                        63463.0                             13   \n",
-       "T001                        70393.0                             14   \n",
-       "T001                        59954.0                             15   \n",
+       "            MEAN(readings.value_S20)  COUNT(readings)  \\\n",
+       "turbine_id                                              \n",
+       "T001                       22.326389              144   \n",
+       "T001                       35.166667              144   \n",
+       "T001                       53.381944              144   \n",
+       "T001                       61.423611              144   \n",
+       "T001                       87.575221              144   \n",
        "\n",
-       "            MODE(readings.MONTH(timestamp))  \\\n",
-       "turbine_id                                    \n",
-       "T001                                      1   \n",
-       "T001                                      1   \n",
-       "T001                                      1   \n",
-       "T001                                      1   \n",
-       "T001                                      1   \n",
+       "            NUM_UNIQUE(readings.YEAR(timestamp))  \\\n",
+       "turbine_id                                         \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
        "\n",
-       "            MODE(readings.WEEKDAY(timestamp))  MODE(readings.YEAR(timestamp))  \\\n",
-       "turbine_id                                                                      \n",
-       "T001                                        4                            2013   \n",
-       "T001                                        5                            2013   \n",
-       "T001                                        6                            2013   \n",
-       "T001                                        0                            2013   \n",
-       "T001                                        1                            2013   \n",
+       "            NUM_UNIQUE(readings.MONTH(timestamp))  \\\n",
+       "turbine_id                                          \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
        "\n",
        "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
        "turbine_id                                        \n",
@@ -1089,14 +923,6 @@
        "T001                                          2   \n",
        "T001                                          2   \n",
        "\n",
-       "            NUM_UNIQUE(readings.MONTH(timestamp))  \\\n",
-       "turbine_id                                          \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "\n",
        "            NUM_UNIQUE(readings.WEEKDAY(timestamp))  \\\n",
        "turbine_id                                            \n",
        "T001                                              2   \n",
@@ -1105,18 +931,26 @@
        "T001                                              2   \n",
        "T001                                              2   \n",
        "\n",
-       "            NUM_UNIQUE(readings.YEAR(timestamp))  \n",
-       "turbine_id                                        \n",
-       "T001                                           1  \n",
-       "T001                                           1  \n",
-       "T001                                           1  \n",
-       "T001                                           1  \n",
-       "T001                                           1  \n",
+       "            MODE(readings.YEAR(timestamp))  MODE(readings.MONTH(timestamp))  \\\n",
+       "turbine_id                                                                    \n",
+       "T001                                  2013                                1   \n",
+       "T001                                  2013                                1   \n",
+       "T001                                  2013                                1   \n",
+       "T001                                  2013                                1   \n",
+       "T001                                  2013                                1   \n",
+       "\n",
+       "            MODE(readings.DAY(timestamp))  MODE(readings.WEEKDAY(timestamp))  \n",
+       "turbine_id                                                                    \n",
+       "T001                                   11                                  4  \n",
+       "T001                                   12                                  5  \n",
+       "T001                                   13                                  6  \n",
+       "T001                                   14                                  0  \n",
+       "T001                                   15                                  1  \n",
        "\n",
        "[5 rows x 165 columns]"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1127,7 +961,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -1136,7 +970,7 @@
        "165"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1159,17 +993,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 5\n",
+    "step = 3\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -1178,7 +1012,7 @@
        "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1189,7 +1023,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
@@ -1213,27 +1047,27 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>COUNT(readings)</th>\n",
-       "      <th>MAX(readings.value_S01)</th>\n",
-       "      <th>MAX(readings.value_S02)</th>\n",
-       "      <th>MAX(readings.value_S03)</th>\n",
-       "      <th>MAX(readings.value_S04)</th>\n",
-       "      <th>MAX(readings.value_S05)</th>\n",
-       "      <th>MAX(readings.value_S06)</th>\n",
-       "      <th>MAX(readings.value_S07)</th>\n",
-       "      <th>MAX(readings.value_S08)</th>\n",
-       "      <th>MAX(readings.value_S09)</th>\n",
-       "      <th>...</th>\n",
+       "      <th>SUM(readings.value_S14)</th>\n",
+       "      <th>SUM(readings.value_S11)</th>\n",
        "      <th>SUM(readings.value_S25)</th>\n",
-       "      <th>SUM(readings.value_S26)</th>\n",
-       "      <th>MODE(readings.DAY(timestamp))</th>\n",
-       "      <th>MODE(readings.MONTH(timestamp))</th>\n",
-       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
-       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
+       "      <th>SUM(readings.value_S23)</th>\n",
+       "      <th>SUM(readings.value_S17)</th>\n",
+       "      <th>SUM(readings.value_S19)</th>\n",
+       "      <th>SUM(readings.value_S04)</th>\n",
+       "      <th>SUM(readings.value_S05)</th>\n",
+       "      <th>SUM(readings.value_S21)</th>\n",
+       "      <th>SUM(readings.value_S16)</th>\n",
+       "      <th>...</th>\n",
+       "      <th>MEAN(readings.value_S20)</th>\n",
+       "      <th>COUNT(readings)</th>\n",
+       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
        "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
+       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
        "      <th>NUM_UNIQUE(readings.WEEKDAY(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
+       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
+       "      <th>MODE(readings.MONTH(timestamp))</th>\n",
+       "      <th>MODE(readings.DAY(timestamp))</th>\n",
+       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>turbine_id</th>\n",
@@ -1263,122 +1097,122 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>144</td>\n",
-       "      <td>369.0</td>\n",
-       "      <td>376.0</td>\n",
-       "      <td>378.0</td>\n",
-       "      <td>401.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>324.0</td>\n",
-       "      <td>301.0</td>\n",
-       "      <td>3209069.0</td>\n",
-       "      <td>706654.0</td>\n",
-       "      <td>...</td>\n",
+       "      <td>465421817.0</td>\n",
+       "      <td>496362516.0</td>\n",
        "      <td>2743.0</td>\n",
-       "      <td>20569.0</td>\n",
-       "      <td>11</td>\n",
+       "      <td>2780.0</td>\n",
+       "      <td>994.6</td>\n",
+       "      <td>3174.0</td>\n",
+       "      <td>19412.0</td>\n",
+       "      <td>17083.0</td>\n",
+       "      <td>3061.0</td>\n",
+       "      <td>550.4</td>\n",
+       "      <td>...</td>\n",
+       "      <td>22.326389</td>\n",
+       "      <td>144</td>\n",
        "      <td>1</td>\n",
-       "      <td>4</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2013</td>\n",
        "      <td>1</td>\n",
+       "      <td>11</td>\n",
+       "      <td>4</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>144</td>\n",
-       "      <td>505.0</td>\n",
-       "      <td>426.0</td>\n",
-       "      <td>393.0</td>\n",
-       "      <td>517.0</td>\n",
-       "      <td>469.0</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>459.0</td>\n",
-       "      <td>3214181.0</td>\n",
-       "      <td>711718.0</td>\n",
-       "      <td>...</td>\n",
+       "      <td>465897578.0</td>\n",
+       "      <td>496952628.0</td>\n",
        "      <td>4237.0</td>\n",
-       "      <td>32991.0</td>\n",
-       "      <td>12</td>\n",
+       "      <td>4640.0</td>\n",
+       "      <td>1166.7</td>\n",
+       "      <td>5112.0</td>\n",
+       "      <td>38289.0</td>\n",
+       "      <td>34344.0</td>\n",
+       "      <td>4919.0</td>\n",
+       "      <td>713.7</td>\n",
+       "      <td>...</td>\n",
+       "      <td>35.166667</td>\n",
+       "      <td>144</td>\n",
        "      <td>1</td>\n",
-       "      <td>5</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2013</td>\n",
        "      <td>1</td>\n",
+       "      <td>12</td>\n",
+       "      <td>5</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>144</td>\n",
-       "      <td>827.0</td>\n",
-       "      <td>794.0</td>\n",
-       "      <td>839.0</td>\n",
-       "      <td>848.0</td>\n",
-       "      <td>843.0</td>\n",
-       "      <td>843.0</td>\n",
-       "      <td>844.0</td>\n",
-       "      <td>3223315.0</td>\n",
-       "      <td>719405.0</td>\n",
-       "      <td>...</td>\n",
+       "      <td>466806830.0</td>\n",
+       "      <td>498019072.0</td>\n",
        "      <td>9008.0</td>\n",
-       "      <td>63463.0</td>\n",
-       "      <td>13</td>\n",
+       "      <td>9179.0</td>\n",
+       "      <td>1581.7</td>\n",
+       "      <td>9134.0</td>\n",
+       "      <td>86707.0</td>\n",
+       "      <td>78749.0</td>\n",
+       "      <td>9863.0</td>\n",
+       "      <td>916.3</td>\n",
+       "      <td>...</td>\n",
+       "      <td>53.381944</td>\n",
+       "      <td>144</td>\n",
        "      <td>1</td>\n",
-       "      <td>6</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2013</td>\n",
        "      <td>1</td>\n",
+       "      <td>13</td>\n",
+       "      <td>6</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>144</td>\n",
-       "      <td>848.0</td>\n",
-       "      <td>841.0</td>\n",
-       "      <td>838.0</td>\n",
-       "      <td>849.0</td>\n",
-       "      <td>850.0</td>\n",
-       "      <td>848.0</td>\n",
-       "      <td>850.0</td>\n",
-       "      <td>3233989.0</td>\n",
-       "      <td>728250.0</td>\n",
-       "      <td>...</td>\n",
+       "      <td>468250434.0</td>\n",
+       "      <td>499530451.0</td>\n",
        "      <td>10073.0</td>\n",
-       "      <td>70393.0</td>\n",
-       "      <td>14</td>\n",
+       "      <td>10310.0</td>\n",
+       "      <td>1690.9</td>\n",
+       "      <td>10674.0</td>\n",
+       "      <td>87907.0</td>\n",
+       "      <td>83264.0</td>\n",
+       "      <td>10638.0</td>\n",
+       "      <td>970.6</td>\n",
+       "      <td>...</td>\n",
+       "      <td>61.423611</td>\n",
+       "      <td>144</td>\n",
        "      <td>1</td>\n",
-       "      <td>0</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>2</td>\n",
        "      <td>1</td>\n",
        "      <td>2</td>\n",
+       "      <td>2</td>\n",
+       "      <td>2013</td>\n",
        "      <td>1</td>\n",
+       "      <td>14</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>T001</th>\n",
-       "      <td>144</td>\n",
-       "      <td>825.0</td>\n",
-       "      <td>840.0</td>\n",
-       "      <td>840.0</td>\n",
-       "      <td>844.0</td>\n",
-       "      <td>844.0</td>\n",
-       "      <td>830.0</td>\n",
-       "      <td>839.0</td>\n",
-       "      <td>3242820.0</td>\n",
-       "      <td>738155.0</td>\n",
-       "      <td>...</td>\n",
+       "      <td>371675934.0</td>\n",
+       "      <td>400196323.0</td>\n",
        "      <td>7381.0</td>\n",
-       "      <td>59954.0</td>\n",
-       "      <td>15</td>\n",
+       "      <td>8228.0</td>\n",
+       "      <td>1666.0</td>\n",
+       "      <td>8831.0</td>\n",
+       "      <td>68811.0</td>\n",
+       "      <td>64088.0</td>\n",
+       "      <td>8629.0</td>\n",
+       "      <td>948.8</td>\n",
+       "      <td>...</td>\n",
+       "      <td>87.575221</td>\n",
+       "      <td>144</td>\n",
        "      <td>1</td>\n",
        "      <td>1</td>\n",
-       "      <td>2013</td>\n",
        "      <td>2</td>\n",
-       "      <td>1</td>\n",
        "      <td>2</td>\n",
+       "      <td>2013</td>\n",
+       "      <td>1</td>\n",
+       "      <td>15</td>\n",
        "      <td>1</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1387,69 +1221,69 @@
        "</div>"
       ],
       "text/plain": [
-       "            COUNT(readings)  MAX(readings.value_S01)  MAX(readings.value_S02)  \\\n",
-       "turbine_id                                                                      \n",
-       "T001                    144                    369.0                    376.0   \n",
-       "T001                    144                    505.0                    426.0   \n",
-       "T001                    144                    827.0                    794.0   \n",
-       "T001                    144                    848.0                    841.0   \n",
-       "T001                    144                    825.0                    840.0   \n",
+       "            SUM(readings.value_S14)  SUM(readings.value_S11)  \\\n",
+       "turbine_id                                                     \n",
+       "T001                    465421817.0              496362516.0   \n",
+       "T001                    465897578.0              496952628.0   \n",
+       "T001                    466806830.0              498019072.0   \n",
+       "T001                    468250434.0              499530451.0   \n",
+       "T001                    371675934.0              400196323.0   \n",
        "\n",
-       "            MAX(readings.value_S03)  MAX(readings.value_S04)  \\\n",
+       "            SUM(readings.value_S25)  SUM(readings.value_S23)  \\\n",
        "turbine_id                                                     \n",
-       "T001                          378.0                    401.0   \n",
-       "T001                          393.0                    517.0   \n",
-       "T001                          839.0                    848.0   \n",
-       "T001                          838.0                    849.0   \n",
-       "T001                          840.0                    844.0   \n",
+       "T001                         2743.0                   2780.0   \n",
+       "T001                         4237.0                   4640.0   \n",
+       "T001                         9008.0                   9179.0   \n",
+       "T001                        10073.0                  10310.0   \n",
+       "T001                         7381.0                   8228.0   \n",
        "\n",
-       "            MAX(readings.value_S05)  MAX(readings.value_S06)  \\\n",
+       "            SUM(readings.value_S17)  SUM(readings.value_S19)  \\\n",
        "turbine_id                                                     \n",
-       "T001                          317.0                    324.0   \n",
-       "T001                          469.0                    407.0   \n",
-       "T001                          843.0                    843.0   \n",
-       "T001                          850.0                    848.0   \n",
-       "T001                          844.0                    830.0   \n",
+       "T001                          994.6                   3174.0   \n",
+       "T001                         1166.7                   5112.0   \n",
+       "T001                         1581.7                   9134.0   \n",
+       "T001                         1690.9                  10674.0   \n",
+       "T001                         1666.0                   8831.0   \n",
        "\n",
-       "            MAX(readings.value_S07)  MAX(readings.value_S08)  \\\n",
+       "            SUM(readings.value_S04)  SUM(readings.value_S05)  \\\n",
        "turbine_id                                                     \n",
-       "T001                          301.0                3209069.0   \n",
-       "T001                          459.0                3214181.0   \n",
-       "T001                          844.0                3223315.0   \n",
-       "T001                          850.0                3233989.0   \n",
-       "T001                          839.0                3242820.0   \n",
+       "T001                        19412.0                  17083.0   \n",
+       "T001                        38289.0                  34344.0   \n",
+       "T001                        86707.0                  78749.0   \n",
+       "T001                        87907.0                  83264.0   \n",
+       "T001                        68811.0                  64088.0   \n",
        "\n",
-       "            MAX(readings.value_S09)  ...  SUM(readings.value_S25)  \\\n",
-       "turbine_id                           ...                            \n",
-       "T001                       706654.0  ...                   2743.0   \n",
-       "T001                       711718.0  ...                   4237.0   \n",
-       "T001                       719405.0  ...                   9008.0   \n",
-       "T001                       728250.0  ...                  10073.0   \n",
-       "T001                       738155.0  ...                   7381.0   \n",
+       "            SUM(readings.value_S21)  SUM(readings.value_S16)  ...  \\\n",
+       "turbine_id                                                    ...   \n",
+       "T001                         3061.0                    550.4  ...   \n",
+       "T001                         4919.0                    713.7  ...   \n",
+       "T001                         9863.0                    916.3  ...   \n",
+       "T001                        10638.0                    970.6  ...   \n",
+       "T001                         8629.0                    948.8  ...   \n",
        "\n",
-       "            SUM(readings.value_S26)  MODE(readings.DAY(timestamp))  \\\n",
-       "turbine_id                                                           \n",
-       "T001                        20569.0                             11   \n",
-       "T001                        32991.0                             12   \n",
-       "T001                        63463.0                             13   \n",
-       "T001                        70393.0                             14   \n",
-       "T001                        59954.0                             15   \n",
+       "            MEAN(readings.value_S20)  COUNT(readings)  \\\n",
+       "turbine_id                                              \n",
+       "T001                       22.326389              144   \n",
+       "T001                       35.166667              144   \n",
+       "T001                       53.381944              144   \n",
+       "T001                       61.423611              144   \n",
+       "T001                       87.575221              144   \n",
        "\n",
-       "            MODE(readings.MONTH(timestamp))  \\\n",
-       "turbine_id                                    \n",
-       "T001                                      1   \n",
-       "T001                                      1   \n",
-       "T001                                      1   \n",
-       "T001                                      1   \n",
-       "T001                                      1   \n",
+       "            NUM_UNIQUE(readings.YEAR(timestamp))  \\\n",
+       "turbine_id                                         \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
+       "T001                                           1   \n",
        "\n",
-       "            MODE(readings.WEEKDAY(timestamp))  MODE(readings.YEAR(timestamp))  \\\n",
-       "turbine_id                                                                      \n",
-       "T001                                        4                            2013   \n",
-       "T001                                        5                            2013   \n",
-       "T001                                        6                            2013   \n",
-       "T001                                        0                            2013   \n",
-       "T001                                        1                            2013   \n",
+       "            NUM_UNIQUE(readings.MONTH(timestamp))  \\\n",
+       "turbine_id                                          \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
+       "T001                                            1   \n",
        "\n",
        "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
        "turbine_id                                        \n",
@@ -1459,14 +1293,6 @@
        "T001                                          2   \n",
        "T001                                          2   \n",
        "\n",
-       "            NUM_UNIQUE(readings.MONTH(timestamp))  \\\n",
-       "turbine_id                                          \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "\n",
        "            NUM_UNIQUE(readings.WEEKDAY(timestamp))  \\\n",
        "turbine_id                                            \n",
        "T001                                              2   \n",
@@ -1475,18 +1301,26 @@
        "T001                                              2   \n",
        "T001                                              2   \n",
        "\n",
-       "            NUM_UNIQUE(readings.YEAR(timestamp))  \n",
-       "turbine_id                                        \n",
-       "T001                                           1  \n",
-       "T001                                           1  \n",
-       "T001                                           1  \n",
-       "T001                                           1  \n",
-       "T001                                           1  \n",
+       "            MODE(readings.YEAR(timestamp))  MODE(readings.MONTH(timestamp))  \\\n",
+       "turbine_id                                                                    \n",
+       "T001                                  2013                                1   \n",
+       "T001                                  2013                                1   \n",
+       "T001                                  2013                                1   \n",
+       "T001                                  2013                                1   \n",
+       "T001                                  2013                                1   \n",
+       "\n",
+       "            MODE(readings.DAY(timestamp))  MODE(readings.WEEKDAY(timestamp))  \n",
+       "turbine_id                                                                    \n",
+       "T001                                   11                                  4  \n",
+       "T001                                   12                                  5  \n",
+       "T001                                   13                                  6  \n",
+       "T001                                   14                                  0  \n",
+       "T001                                   15                                  1  \n",
        "\n",
        "[5 rows x 165 columns]"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1497,7 +1331,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -1702,7 +1536,7 @@
        "[5 rows x 28 columns]"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1724,17 +1558,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 6\n",
+    "step = 4\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -1743,7 +1577,7 @@
        "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1755,7 +1589,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -1769,7 +1603,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.6.12"
+   "version": "3.7.11"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/pipelines/double_lstm_with_unstack.ipynb b/tutorials/pipelines/double_lstm_with_unstack.ipynb
new file mode 100644
index 0000000..4bc7d0f
--- /dev/null
+++ b/tutorials/pipelines/double_lstm_with_unstack.ipynb
@@ -0,0 +1,2375 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# double_lstm_with_unstack"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from draco.demo import load_demo\n",
+    "\n",
+    "target_times, readings = load_demo()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_name = 'double_lstm_with_unstack'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from draco.pipeline import DracoPipeline\n",
+    "\n",
+    "pipeline = DracoPipeline(pipeline_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['mlblocks.MLPipeline',\n",
+       " 'pandas.DataFrame.pop',\n",
+       " 'pandas.DataFrame.pop',\n",
+       " 'sklearn.impute.SimpleImputer',\n",
+       " 'sklearn.preprocessing.MinMaxScaler',\n",
+       " 'pandas.DataFrame',\n",
+       " 'pandas.DataFrame.set',\n",
+       " 'pandas.DataFrame.set',\n",
+       " 'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
+       " 'keras.Sequential.DoubleLSTMTimeSeriesClassifier']"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.template['primitives']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step by Step execution"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Input Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>320.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>284.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>348.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>273.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-10  323.0\n",
+       "1       T001       S02 2013-01-10  320.0\n",
+       "2       T001       S03 2013-01-10  284.0\n",
+       "3       T001       S04 2013-01-10  348.0\n",
+       "4       T001       S05 2013-01-10  273.0"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-13</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-14</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-15</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-16</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id cutoff_time  target\n",
+       "0       T001  2013-01-12       0\n",
+       "1       T001  2013-01-13       0\n",
+       "2       T001  2013-01-14       0\n",
+       "3       T001  2013-01-15       1\n",
+       "4       T001  2013-01-16       0"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Preparation (part of Draco Pipeline)\n",
+    "\n",
+    "* Input: target_times, readings, turbines\n",
+    "* Output: X, y, readings, turbines\n",
+    "* Effect: target_times has been split into X and y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mlblocks.MLPipeline\n",
+    "\n",
+    "### pandas.DataFrame.resample\n",
+    "\n",
+    "* Input: readings\n",
+    "* Output: readings (resampled)\n",
+    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
+    "  signal_id and timestamp have been set as a multi-index\n",
+    "  \n",
+    "### pandas.DataFrame.unstack\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: readings (unstacked)\n",
+    "* Effect: readings have been unstacked"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "context = pipeline.fit(target_times, readings, output_=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id           timestamp  value_S01  value_S02  value_S03  value_S04  \\\n",
+       "0       T001 2013-01-10 00:00:00      323.0      320.0      284.0      348.0   \n",
+       "1       T001 2013-01-10 00:10:00      346.0      384.0      367.0      411.0   \n",
+       "2       T001 2013-01-10 00:20:00      407.0      363.0      407.0      393.0   \n",
+       "3       T001 2013-01-10 00:30:00      257.0      307.0      315.0      361.0   \n",
+       "4       T001 2013-01-10 00:40:00      267.0      309.0      314.0      355.0   \n",
+       "\n",
+       "   value_S05  value_S06  value_S07  value_S08  ...  value_S17  value_S18  \\\n",
+       "0      273.0      342.0      280.0  3197842.0  ...       11.7  3131020.0   \n",
+       "1      331.0      360.0      249.0  3197900.0  ...       10.2  3131420.0   \n",
+       "2      275.0      335.0      270.0  3197968.0  ...        9.5  3131822.0   \n",
+       "3      317.0      354.0      271.0  3198011.0  ...       10.5  3132179.0   \n",
+       "4      262.0      246.0      212.0  3198056.0  ...        9.6  3132501.0   \n",
+       "\n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "\n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.pop\n",
+    "\n",
+    "* Input: readings (unstacked)\n",
+    "* Output: readings (without turbine_id), turbine_id\n",
+    "* Effect: turbine_id has been popped from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 1\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    T001\n",
+       "1    T001\n",
+       "2    T001\n",
+       "3    T001\n",
+       "4    T001\n",
+       "Name: turbine_id, dtype: object"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['turbine_id'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>value_S09</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
+       "      <td>695000.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
+       "      <td>695063.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
+       "      <td>695124.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
+       "      <td>695175.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
+       "      <td>695226.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            timestamp  value_S01  value_S02  value_S03  value_S04  value_S05  \\\n",
+       "0 2013-01-10 00:00:00      323.0      320.0      284.0      348.0      273.0   \n",
+       "1 2013-01-10 00:10:00      346.0      384.0      367.0      411.0      331.0   \n",
+       "2 2013-01-10 00:20:00      407.0      363.0      407.0      393.0      275.0   \n",
+       "3 2013-01-10 00:30:00      257.0      307.0      315.0      361.0      317.0   \n",
+       "4 2013-01-10 00:40:00      267.0      309.0      314.0      355.0      262.0   \n",
+       "\n",
+       "   value_S06  value_S07  value_S08  value_S09  ...  value_S17  value_S18  \\\n",
+       "0      342.0      280.0  3197842.0   695000.0  ...       11.7  3131020.0   \n",
+       "1      360.0      249.0  3197900.0   695063.0  ...       10.2  3131420.0   \n",
+       "2      335.0      270.0  3197968.0   695124.0  ...        9.5  3131822.0   \n",
+       "3      354.0      271.0  3198011.0   695175.0  ...       10.5  3132179.0   \n",
+       "4      246.0      212.0  3198056.0   695226.0  ...        9.6  3132501.0   \n",
+       "\n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "\n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.pop\n",
+    "\n",
+    "* Input: readings (without turbine_id)\n",
+    "* Output: readings (without timestamp), timestamp\n",
+    "* Effect: timestamp has been popped from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 2\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0   2013-01-10 00:00:00\n",
+       "1   2013-01-10 00:10:00\n",
+       "2   2013-01-10 00:20:00\n",
+       "3   2013-01-10 00:30:00\n",
+       "4   2013-01-10 00:40:00\n",
+       "Name: timestamp, dtype: datetime64[ns]"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['timestamp'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>value_S09</th>\n",
+       "      <th>value_S10</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
+       "      <td>695000.0</td>\n",
+       "      <td>3348234.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
+       "      <td>695063.0</td>\n",
+       "      <td>3348296.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
+       "      <td>695124.0</td>\n",
+       "      <td>3348363.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
+       "      <td>695175.0</td>\n",
+       "      <td>3348416.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
+       "      <td>695226.0</td>\n",
+       "      <td>3348470.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   value_S01  value_S02  value_S03  value_S04  value_S05  value_S06  \\\n",
+       "0      323.0      320.0      284.0      348.0      273.0      342.0   \n",
+       "1      346.0      384.0      367.0      411.0      331.0      360.0   \n",
+       "2      407.0      363.0      407.0      393.0      275.0      335.0   \n",
+       "3      257.0      307.0      315.0      361.0      317.0      354.0   \n",
+       "4      267.0      309.0      314.0      355.0      262.0      246.0   \n",
+       "\n",
+       "   value_S07  value_S08  value_S09  value_S10  ...  value_S17  value_S18  \\\n",
+       "0      280.0  3197842.0   695000.0  3348234.0  ...       11.7  3131020.0   \n",
+       "1      249.0  3197900.0   695063.0  3348296.0  ...       10.2  3131420.0   \n",
+       "2      270.0  3197968.0   695124.0  3348363.0  ...        9.5  3131822.0   \n",
+       "3      271.0  3198011.0   695175.0  3348416.0  ...       10.5  3132179.0   \n",
+       "4      212.0  3198056.0   695226.0  3348470.0  ...        9.6  3132501.0   \n",
+       "\n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "\n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sklearn.impute.SimpleImputer\n",
+    "\n",
+    "* Input: readings (unstacked, no turbine_id, no timestamp)\n",
+    "* Output: readings (imputed, numpy array)\n",
+    "* Effect: readings have been imputed and converted to numpy array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 3\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[3.230000e+02, 3.200000e+02, 2.840000e+02, 3.480000e+02,\n",
+       "        2.730000e+02, 3.420000e+02, 2.800000e+02, 3.197842e+06,\n",
+       "        6.950000e+05, 3.348234e+06, 3.436762e+06, 3.322362e+06,\n",
+       "        3.357952e+06, 3.223797e+06, 8.300000e+00, 6.000000e+00,\n",
+       "        1.170000e+01, 3.131020e+06, 5.500000e+01, 5.500000e+01,\n",
+       "        4.700000e+01, 5.800000e+01, 4.500000e+01, 5.800000e+01,\n",
+       "        4.700000e+01, 3.560000e+02],\n",
+       "       [3.460000e+02, 3.840000e+02, 3.670000e+02, 4.110000e+02,\n",
+       "        3.310000e+02, 3.600000e+02, 2.490000e+02, 3.197900e+06,\n",
+       "        6.950630e+05, 3.348296e+06, 3.436829e+06, 3.322417e+06,\n",
+       "        3.358013e+06, 3.223839e+06, 7.600000e+00, 5.000000e+00,\n",
+       "        1.020000e+01, 3.131420e+06, 5.800000e+01, 6.300000e+01,\n",
+       "        6.200000e+01, 6.700000e+01, 5.500000e+01, 6.100000e+01,\n",
+       "        4.200000e+01, 4.000000e+02],\n",
+       "       [4.070000e+02, 3.630000e+02, 4.070000e+02, 3.930000e+02,\n",
+       "        2.750000e+02, 3.350000e+02, 2.700000e+02, 3.197968e+06,\n",
+       "        6.951240e+05, 3.348363e+06, 3.436895e+06, 3.322463e+06,\n",
+       "        3.358068e+06, 3.223884e+06, 7.800000e+00, 5.700000e+00,\n",
+       "        9.500000e+00, 3.131822e+06, 6.800000e+01, 6.100000e+01,\n",
+       "        6.700000e+01, 6.600000e+01, 4.600000e+01, 5.500000e+01,\n",
+       "        4.500000e+01, 4.020000e+02],\n",
+       "       [2.570000e+02, 3.070000e+02, 3.150000e+02, 3.610000e+02,\n",
+       "        3.170000e+02, 3.540000e+02, 2.710000e+02, 3.198011e+06,\n",
+       "        6.951750e+05, 3.348416e+06, 3.436957e+06, 3.322516e+06,\n",
+       "        3.358128e+06, 3.223929e+06, 8.600000e+00, 6.600000e+00,\n",
+       "        1.050000e+01, 3.132179e+06, 4.300000e+01, 5.100000e+01,\n",
+       "        5.300000e+01, 6.200000e+01, 5.300000e+01, 6.000000e+01,\n",
+       "        4.500000e+01, 3.570000e+02],\n",
+       "       [2.670000e+02, 3.090000e+02, 3.140000e+02, 3.550000e+02,\n",
+       "        2.620000e+02, 2.460000e+02, 2.120000e+02, 3.198056e+06,\n",
+       "        6.952260e+05, 3.348470e+06, 3.437016e+06, 3.322559e+06,\n",
+       "        3.358169e+06, 3.223965e+06, 7.500000e+00, 5.900000e+00,\n",
+       "        9.600000e+00, 3.132501e+06, 4.500000e+01, 5.100000e+01,\n",
+       "        5.400000e+01, 5.900000e+01, 4.300000e+01, 4.100000e+01,\n",
+       "        3.600000e+01, 3.220000e+02]])"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'][0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sklearn.preprocessing.MinMaxScaler\n",
+    "\n",
+    "* Input: (imputed, array)\n",
+    "* Output: readings (scaled, array)\n",
+    "* Effect: readings have been scaled to [-1, 1] range"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 4\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.23563892, -0.24267292, -0.3286385 , -0.17702227, -0.35287222,\n",
+       "        -0.19248826, -0.3317757 , -1.        , -1.        , -1.        ,\n",
+       "        -1.        , -1.        , -1.        , -1.        , -0.11702128,\n",
+       "        -0.24050633, -0.25714286, -0.37378787, -0.22758621, -0.22758621,\n",
+       "        -0.31972789, -0.1862069 , -0.36986301, -0.1862069 , -0.33793103,\n",
+       "        -0.26141079],\n",
+       "       [-0.18171161, -0.0926143 , -0.13380282, -0.02930832, -0.21688159,\n",
+       "        -0.15023474, -0.40420561, -0.99995911, -0.99995779, -0.99995941,\n",
+       "        -0.99995718, -0.99996326, -0.99996042, -0.99997164, -0.19148936,\n",
+       "        -0.36708861, -0.35238095, -0.37370786, -0.1862069 , -0.11724138,\n",
+       "        -0.11564626, -0.06206897, -0.23287671, -0.14482759, -0.40689655,\n",
+       "        -0.17012448],\n",
+       "       [-0.03868699, -0.14185229, -0.0399061 , -0.07151231, -0.34818288,\n",
+       "        -0.20892019, -0.35514019, -0.99991116, -0.99991693, -0.99991555,\n",
+       "        -0.999915  , -0.99993254, -0.99992474, -0.99994125, -0.17021277,\n",
+       "        -0.27848101, -0.3968254 , -0.37362746, -0.04827586, -0.14482759,\n",
+       "        -0.04761905, -0.07586207, -0.35616438, -0.22758621, -0.36551724,\n",
+       "        -0.1659751 ],\n",
+       "       [-0.39038687, -0.27315358, -0.25586854, -0.14654162, -0.24970692,\n",
+       "        -0.16431925, -0.35280374, -0.99988085, -0.99988276, -0.99988086,\n",
+       "        -0.99987538, -0.99989714, -0.99988581, -0.99991086, -0.08510638,\n",
+       "        -0.16455696, -0.33333333, -0.37355606, -0.39310345, -0.28275862,\n",
+       "        -0.23809524, -0.13103448, -0.26027397, -0.15862069, -0.36551724,\n",
+       "        -0.2593361 ],\n",
+       "       [-0.36694021, -0.26846424, -0.25821596, -0.16060961, -0.37866354,\n",
+       "        -0.41784038, -0.49065421, -0.99984912, -0.99984859, -0.99984551,\n",
+       "        -0.99983767, -0.99986841, -0.99985921, -0.99988655, -0.20212766,\n",
+       "        -0.25316456, -0.39047619, -0.37349166, -0.36551724, -0.28275862,\n",
+       "        -0.2244898 , -0.17241379, -0.39726027, -0.42068966, -0.48965517,\n",
+       "        -0.33195021]])"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'][0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame\n",
+    "\n",
+    "* Input: readings (scaled, array)\n",
+    "* Output: readings (dataframe)\n",
+    "* Effect: readings have been converted into a dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 5\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>16</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.235639</td>\n",
+       "      <td>-0.242673</td>\n",
+       "      <td>-0.328638</td>\n",
+       "      <td>-0.177022</td>\n",
+       "      <td>-0.352872</td>\n",
+       "      <td>-0.192488</td>\n",
+       "      <td>-0.331776</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.257143</td>\n",
+       "      <td>-0.373788</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.319728</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.369863</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.337931</td>\n",
+       "      <td>-0.261411</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.181712</td>\n",
+       "      <td>-0.092614</td>\n",
+       "      <td>-0.133803</td>\n",
+       "      <td>-0.029308</td>\n",
+       "      <td>-0.216882</td>\n",
+       "      <td>-0.150235</td>\n",
+       "      <td>-0.404206</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>-0.999958</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.352381</td>\n",
+       "      <td>-0.373708</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.117241</td>\n",
+       "      <td>-0.115646</td>\n",
+       "      <td>-0.062069</td>\n",
+       "      <td>-0.232877</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.406897</td>\n",
+       "      <td>-0.170124</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.038687</td>\n",
+       "      <td>-0.141852</td>\n",
+       "      <td>-0.039906</td>\n",
+       "      <td>-0.071512</td>\n",
+       "      <td>-0.348183</td>\n",
+       "      <td>-0.208920</td>\n",
+       "      <td>-0.355140</td>\n",
+       "      <td>-0.999911</td>\n",
+       "      <td>-0.999917</td>\n",
+       "      <td>-0.999916</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.396825</td>\n",
+       "      <td>-0.373627</td>\n",
+       "      <td>-0.048276</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.047619</td>\n",
+       "      <td>-0.075862</td>\n",
+       "      <td>-0.356164</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.165975</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.390387</td>\n",
+       "      <td>-0.273154</td>\n",
+       "      <td>-0.255869</td>\n",
+       "      <td>-0.146542</td>\n",
+       "      <td>-0.249707</td>\n",
+       "      <td>-0.164319</td>\n",
+       "      <td>-0.352804</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>-0.999883</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.333333</td>\n",
+       "      <td>-0.373556</td>\n",
+       "      <td>-0.393103</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.238095</td>\n",
+       "      <td>-0.131034</td>\n",
+       "      <td>-0.260274</td>\n",
+       "      <td>-0.158621</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.259336</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.366940</td>\n",
+       "      <td>-0.268464</td>\n",
+       "      <td>-0.258216</td>\n",
+       "      <td>-0.160610</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.417840</td>\n",
+       "      <td>-0.490654</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999846</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.390476</td>\n",
+       "      <td>-0.373492</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.224490</td>\n",
+       "      <td>-0.172414</td>\n",
+       "      <td>-0.397260</td>\n",
+       "      <td>-0.420690</td>\n",
+       "      <td>-0.489655</td>\n",
+       "      <td>-0.331950</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         0         1         2         3         4         5         6   \\\n",
+       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
+       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
+       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
+       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
+       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "\n",
+       "         7         8         9   ...        16        17        18        19  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.257143 -0.373788 -0.227586 -0.227586   \n",
+       "1 -0.999959 -0.999958 -0.999959  ... -0.352381 -0.373708 -0.186207 -0.117241   \n",
+       "2 -0.999911 -0.999917 -0.999916  ... -0.396825 -0.373627 -0.048276 -0.144828   \n",
+       "3 -0.999881 -0.999883 -0.999881  ... -0.333333 -0.373556 -0.393103 -0.282759   \n",
+       "4 -0.999849 -0.999849 -0.999846  ... -0.390476 -0.373492 -0.365517 -0.282759   \n",
+       "\n",
+       "         20        21        22        23        24        25  \n",
+       "0 -0.319728 -0.186207 -0.369863 -0.186207 -0.337931 -0.261411  \n",
+       "1 -0.115646 -0.062069 -0.232877 -0.144828 -0.406897 -0.170124  \n",
+       "2 -0.047619 -0.075862 -0.356164 -0.227586 -0.365517 -0.165975  \n",
+       "3 -0.238095 -0.131034 -0.260274 -0.158621 -0.365517 -0.259336  \n",
+       "4 -0.224490 -0.172414 -0.397260 -0.420690 -0.489655 -0.331950  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.set\n",
+    "\n",
+    "* Input: readings (dataframe)\n",
+    "* Output: readings (dataframe with turbine_id)\n",
+    "* Effect: turbine_id has been set as a readings column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 6\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "      <th>turbine_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.235639</td>\n",
+       "      <td>-0.242673</td>\n",
+       "      <td>-0.328638</td>\n",
+       "      <td>-0.177022</td>\n",
+       "      <td>-0.352872</td>\n",
+       "      <td>-0.192488</td>\n",
+       "      <td>-0.331776</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373788</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.319728</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.369863</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.337931</td>\n",
+       "      <td>-0.261411</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.181712</td>\n",
+       "      <td>-0.092614</td>\n",
+       "      <td>-0.133803</td>\n",
+       "      <td>-0.029308</td>\n",
+       "      <td>-0.216882</td>\n",
+       "      <td>-0.150235</td>\n",
+       "      <td>-0.404206</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>-0.999958</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373708</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.117241</td>\n",
+       "      <td>-0.115646</td>\n",
+       "      <td>-0.062069</td>\n",
+       "      <td>-0.232877</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.406897</td>\n",
+       "      <td>-0.170124</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.038687</td>\n",
+       "      <td>-0.141852</td>\n",
+       "      <td>-0.039906</td>\n",
+       "      <td>-0.071512</td>\n",
+       "      <td>-0.348183</td>\n",
+       "      <td>-0.208920</td>\n",
+       "      <td>-0.355140</td>\n",
+       "      <td>-0.999911</td>\n",
+       "      <td>-0.999917</td>\n",
+       "      <td>-0.999916</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373627</td>\n",
+       "      <td>-0.048276</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.047619</td>\n",
+       "      <td>-0.075862</td>\n",
+       "      <td>-0.356164</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.165975</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.390387</td>\n",
+       "      <td>-0.273154</td>\n",
+       "      <td>-0.255869</td>\n",
+       "      <td>-0.146542</td>\n",
+       "      <td>-0.249707</td>\n",
+       "      <td>-0.164319</td>\n",
+       "      <td>-0.352804</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>-0.999883</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373556</td>\n",
+       "      <td>-0.393103</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.238095</td>\n",
+       "      <td>-0.131034</td>\n",
+       "      <td>-0.260274</td>\n",
+       "      <td>-0.158621</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.259336</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.366940</td>\n",
+       "      <td>-0.268464</td>\n",
+       "      <td>-0.258216</td>\n",
+       "      <td>-0.160610</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.417840</td>\n",
+       "      <td>-0.490654</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999846</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373492</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.224490</td>\n",
+       "      <td>-0.172414</td>\n",
+       "      <td>-0.397260</td>\n",
+       "      <td>-0.420690</td>\n",
+       "      <td>-0.489655</td>\n",
+       "      <td>-0.331950</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1         2         3         4         5         6  \\\n",
+       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
+       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
+       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
+       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
+       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "\n",
+       "          7         8         9  ...        17        18        19        20  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.373788 -0.227586 -0.227586 -0.319728   \n",
+       "1 -0.999959 -0.999958 -0.999959  ... -0.373708 -0.186207 -0.117241 -0.115646   \n",
+       "2 -0.999911 -0.999917 -0.999916  ... -0.373627 -0.048276 -0.144828 -0.047619   \n",
+       "3 -0.999881 -0.999883 -0.999881  ... -0.373556 -0.393103 -0.282759 -0.238095   \n",
+       "4 -0.999849 -0.999849 -0.999846  ... -0.373492 -0.365517 -0.282759 -0.224490   \n",
+       "\n",
+       "         21        22        23        24        25  turbine_id  \n",
+       "0 -0.186207 -0.369863 -0.186207 -0.337931 -0.261411        T001  \n",
+       "1 -0.062069 -0.232877 -0.144828 -0.406897 -0.170124        T001  \n",
+       "2 -0.075862 -0.356164 -0.227586 -0.365517 -0.165975        T001  \n",
+       "3 -0.131034 -0.260274 -0.158621 -0.365517 -0.259336        T001  \n",
+       "4 -0.172414 -0.397260 -0.420690 -0.489655 -0.331950        T001  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.set\n",
+    "\n",
+    "* Input: readings (dataframe with turbine_id)\n",
+    "* Output: readings (dataframe with turbine_id and timestamp)\n",
+    "* Effect: timestamp has been set as a readings column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 7\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.235639</td>\n",
+       "      <td>-0.242673</td>\n",
+       "      <td>-0.328638</td>\n",
+       "      <td>-0.177022</td>\n",
+       "      <td>-0.352872</td>\n",
+       "      <td>-0.192488</td>\n",
+       "      <td>-0.331776</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.319728</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.369863</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.337931</td>\n",
+       "      <td>-0.261411</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.181712</td>\n",
+       "      <td>-0.092614</td>\n",
+       "      <td>-0.133803</td>\n",
+       "      <td>-0.029308</td>\n",
+       "      <td>-0.216882</td>\n",
+       "      <td>-0.150235</td>\n",
+       "      <td>-0.404206</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>-0.999958</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.117241</td>\n",
+       "      <td>-0.115646</td>\n",
+       "      <td>-0.062069</td>\n",
+       "      <td>-0.232877</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.406897</td>\n",
+       "      <td>-0.170124</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.038687</td>\n",
+       "      <td>-0.141852</td>\n",
+       "      <td>-0.039906</td>\n",
+       "      <td>-0.071512</td>\n",
+       "      <td>-0.348183</td>\n",
+       "      <td>-0.208920</td>\n",
+       "      <td>-0.355140</td>\n",
+       "      <td>-0.999911</td>\n",
+       "      <td>-0.999917</td>\n",
+       "      <td>-0.999916</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.048276</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.047619</td>\n",
+       "      <td>-0.075862</td>\n",
+       "      <td>-0.356164</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.165975</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.390387</td>\n",
+       "      <td>-0.273154</td>\n",
+       "      <td>-0.255869</td>\n",
+       "      <td>-0.146542</td>\n",
+       "      <td>-0.249707</td>\n",
+       "      <td>-0.164319</td>\n",
+       "      <td>-0.352804</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>-0.999883</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.393103</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.238095</td>\n",
+       "      <td>-0.131034</td>\n",
+       "      <td>-0.260274</td>\n",
+       "      <td>-0.158621</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.259336</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.366940</td>\n",
+       "      <td>-0.268464</td>\n",
+       "      <td>-0.258216</td>\n",
+       "      <td>-0.160610</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.417840</td>\n",
+       "      <td>-0.490654</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999846</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.224490</td>\n",
+       "      <td>-0.172414</td>\n",
+       "      <td>-0.397260</td>\n",
+       "      <td>-0.420690</td>\n",
+       "      <td>-0.489655</td>\n",
+       "      <td>-0.331950</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1         2         3         4         5         6  \\\n",
+       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
+       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
+       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
+       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
+       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "\n",
+       "          7         8         9  ...        18        19        20        21  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.227586 -0.227586 -0.319728 -0.186207   \n",
+       "1 -0.999959 -0.999958 -0.999959  ... -0.186207 -0.117241 -0.115646 -0.062069   \n",
+       "2 -0.999911 -0.999917 -0.999916  ... -0.048276 -0.144828 -0.047619 -0.075862   \n",
+       "3 -0.999881 -0.999883 -0.999881  ... -0.393103 -0.282759 -0.238095 -0.131034   \n",
+       "4 -0.999849 -0.999849 -0.999846  ... -0.365517 -0.282759 -0.224490 -0.172414   \n",
+       "\n",
+       "         22        23        24        25  turbine_id           timestamp  \n",
+       "0 -0.369863 -0.186207 -0.337931 -0.261411        T001 2013-01-10 00:00:00  \n",
+       "1 -0.232877 -0.144828 -0.406897 -0.170124        T001 2013-01-10 00:10:00  \n",
+       "2 -0.356164 -0.227586 -0.365517 -0.165975        T001 2013-01-10 00:20:00  \n",
+       "3 -0.260274 -0.158621 -0.365517 -0.259336        T001 2013-01-10 00:30:00  \n",
+       "4 -0.397260 -0.420690 -0.489655 -0.331950        T001 2013-01-10 00:40:00  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences\n",
+    "\n",
+    "* Input: X, readings (dataframe with turbine_id and timestamp)\n",
+    "* Output: X\n",
+    "* Effect: X has been converted to a 3d numpy array that contains 1 matrix of shape\n",
+    "  (window_size x num_signals) for each one of the target times."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline._pipeline.get_hyperparameters()[\n",
+    "    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 8\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(51121, 28)"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(353,)"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['y'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(353, 24, 26)"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.66002345, -0.57327081, -0.64084507, -0.57796014, -0.6014068 ,\n",
+       "        -0.56103286, -0.55140187, -0.9928135 , -0.99291267, -0.99315058,\n",
+       "        -0.99304288, -0.99346346, -0.99352632, -0.99395333, -0.42553191,\n",
+       "        -0.41772152, -0.58730159, -0.35996294, -0.66896552, -0.57241379,\n",
+       "        -0.61904762, -0.5862069 , -0.60273973, -0.55862069, -0.55862069,\n",
+       "        -0.59751037],\n",
+       "       [-0.2989449 , -0.38569754, -0.48591549, -0.47713951, -0.66705744,\n",
+       "        -0.5915493 , -0.77336449, -0.99278389, -0.9928852 , -0.99312701,\n",
+       "        -0.99301988, -0.9934481 , -0.9935075 , -0.9939459 , -0.39361702,\n",
+       "        -0.40506329, -0.54285714, -0.35992014, -0.40689655, -0.42068966,\n",
+       "        -0.46938776, -0.48965517, -0.67123288, -0.5862069 , -0.83448276,\n",
+       "        -0.5560166 ],\n",
+       "       [-0.33645955, -0.40679953, -0.39906103, -0.38569754, -0.56154748,\n",
+       "        -0.43192488, -0.45560748, -0.99275498, -0.9928584 , -0.99310017,\n",
+       "        -0.99299431, -0.99342739, -0.99348349, -0.99392294, -0.29787234,\n",
+       "        -0.3164557 , -0.49206349, -0.35986854, -0.42068966, -0.43448276,\n",
+       "        -0.40136054, -0.43448276, -0.56164384, -0.47586207, -0.51724138,\n",
+       "        -0.46473029],\n",
+       "       [-0.36928488, -0.41148886, -0.51173709, -0.47010551, -0.54982415,\n",
+       "        -0.48122066, -0.51869159, -0.99272467, -0.9928316 , -0.99307791,\n",
+       "        -0.99297067, -0.99340669, -0.99346079, -0.99390066, -0.29787234,\n",
+       "        -0.35443038, -0.49206349, -0.35981854, -0.39310345, -0.43448276,\n",
+       "        -0.49659864, -0.47586207, -0.56164384, -0.50344828, -0.53103448,\n",
+       "        -0.4813278 ],\n",
+       "       [-0.20750293, -0.35287222, -0.37089202, -0.2989449 , -0.32473623,\n",
+       "        -0.37793427, -0.45794393, -0.99269435, -0.99280347, -0.99305173,\n",
+       "        -0.99294447, -0.9933793 , -0.99343419, -0.9938777 , -0.32978723,\n",
+       "        -0.39240506, -0.49206349, -0.35976314, -0.39310345, -0.40689655,\n",
+       "        -0.41496599, -0.42068966, -0.42465753, -0.42068966, -0.51724138,\n",
+       "        -0.4253112 ],\n",
+       "       [-0.55685815, -0.60375147, -0.64084507, -0.54513482, -0.55685815,\n",
+       "        -0.58215962, -0.63785047, -0.99267179, -0.99278404, -0.99303471,\n",
+       "        -0.99292338, -0.99335793, -0.99341472, -0.99386014, -0.29787234,\n",
+       "        -0.34177215, -0.51746032, -0.35972353, -0.54482759, -0.5862069 ,\n",
+       "        -0.60544218, -0.53103448, -0.54794521, -0.57241379, -0.62758621,\n",
+       "        -0.58921162],\n",
+       "       [-0.66705744, -0.67643611, -0.69014085, -0.64361079, -0.74443142,\n",
+       "        -0.7370892 , -0.7546729 , -0.99265487, -0.99276863, -0.99302096,\n",
+       "        -0.9929074 , -0.99334657, -0.9934024 , -0.99384934, -0.39361702,\n",
+       "        -0.48101266, -0.51746032, -0.35969533, -0.65517241, -0.66896552,\n",
+       "        -0.67346939, -0.64137931, -0.75342466, -0.72413793, -0.76551724,\n",
+       "        -0.70746888],\n",
+       "       [-0.53341149, -0.60375147, -0.63849765, -0.61547479, -0.71395076,\n",
+       "        -0.70187793, -0.72897196, -0.99263231, -0.99275054, -0.99300394,\n",
+       "        -0.99289014, -0.99333255, -0.99338877, -0.9938365 , -0.38297872,\n",
+       "        -0.37974684, -0.54920635, -0.35966173, -0.54482759, -0.6137931 ,\n",
+       "        -0.60544218, -0.6137931 , -0.69863014, -0.69655172, -0.72413793,\n",
+       "        -0.65145228],\n",
+       "       [-0.44196952, -0.4021102 , -0.49295775, -0.49355217, -0.62719812,\n",
+       "        -0.62676056, -0.71728972, -0.99260481, -0.99272173, -0.99298103,\n",
+       "        -0.99286777, -0.99331518, -0.9933719 , -0.99382367, -0.38297872,\n",
+       "        -0.4556962 , -0.54285714, -0.35961793, -0.44827586, -0.39310345,\n",
+       "        -0.4829932 , -0.50344828, -0.63013699, -0.62758621, -0.72413793,\n",
+       "        -0.54564315],\n",
+       "       [-0.46307151, -0.38100821, -0.35446009, -0.44900352, -0.50293083,\n",
+       "        -0.4741784 , -0.63317757, -0.99257731, -0.99269226, -0.99295157,\n",
+       "        -0.99284285, -0.99329247, -0.9933479 , -0.99380612, -0.28723404,\n",
+       "        -0.3164557 , -0.47301587, -0.35956633, -0.44827586, -0.37931034,\n",
+       "        -0.34693878, -0.44827586, -0.52054795, -0.47586207, -0.62758621,\n",
+       "        -0.46473029],\n",
+       "       [-0.26611958, -0.26611958, -0.29107981, -0.34349355, -0.3950762 ,\n",
+       "        -0.29577465, -0.43925234, -0.9925477 , -0.99266278, -0.99292211,\n",
+       "        -0.99281601, -0.99326575, -0.99331805, -0.99378316, -0.28723404,\n",
+       "        -0.39240506, -0.46031746, -0.35950873, -0.40689655, -0.37931034,\n",
+       "        -0.34693878, -0.40689655, -0.43835616, -0.35172414, -0.51724138,\n",
+       "        -0.40248963],\n",
+       "       [-0.46307151, -0.35990621, -0.43192488, -0.36928488, -0.47245018,\n",
+       "        -0.44600939, -0.41121495, -0.99252091, -0.9926333 , -0.99289592,\n",
+       "        -0.99278789, -0.99324104, -0.99329275, -0.99375547, -0.28723404,\n",
+       "        -0.43037975, -0.46666667, -0.35945292, -0.46206897, -0.37931034,\n",
+       "        -0.41496599, -0.37931034, -0.47945205, -0.44827586, -0.42068966,\n",
+       "        -0.42116183],\n",
+       "       [-0.44431419, -0.4021102 , -0.38732394, -0.3059789 , -0.35990621,\n",
+       "        -0.28403756, -0.40420561, -0.99249341, -0.99260583, -0.99287039,\n",
+       "        -0.99275913, -0.99321298, -0.99326226, -0.99372846, -0.30851064,\n",
+       "        -0.4556962 , -0.46031746, -0.35939572, -0.44827586, -0.42068966,\n",
+       "        -0.42857143, -0.36551724, -0.4109589 , -0.33793103, -0.43448276,\n",
+       "        -0.406639  ],\n",
+       "       [-0.43962485, -0.36459555, -0.35211268, -0.35052755, -0.44665885,\n",
+       "        -0.34741784, -0.44859813, -0.99246592, -0.99257703, -0.99284028,\n",
+       "        -0.99273037, -0.99318693, -0.99323176, -0.99370279, -0.28723404,\n",
+       "        -0.36708861, -0.48571429, -0.35933712, -0.44827586, -0.39310345,\n",
+       "        -0.33333333, -0.36551724, -0.45205479, -0.33793103, -0.46206897,\n",
+       "        -0.39211618],\n",
+       "       [-0.2028136 , -0.25439625, -0.30751174, -0.3130129 , -0.37631887,\n",
+       "        -0.3685446 , -0.46495327, -0.99243067, -0.99254152, -0.9928082 ,\n",
+       "        -0.99269906, -0.99315821, -0.99320322, -0.99367781, -0.27659574,\n",
+       "        -0.32911392, -0.47301587, -0.35927332, -0.29655172, -0.25517241,\n",
+       "        -0.29251701, -0.31034483, -0.39726027, -0.37931034, -0.47586207,\n",
+       "        -0.33817427],\n",
+       "       [-0.23329426, -0.27080891, -0.31924883, -0.24736225, -0.35521688,\n",
+       "        -0.33098592, -0.4182243 , -0.99239753, -0.99250668, -0.99277743,\n",
+       "        -0.99266518, -0.99312815, -0.99317272, -0.99365012, -0.26595745,\n",
+       "        -0.40506329, -0.46666667, -0.35920811, -0.33793103, -0.26896552,\n",
+       "        -0.31972789, -0.25517241, -0.36986301, -0.33793103, -0.42068966,\n",
+       "        -0.32365145],\n",
+       "       [-0.12778429, -0.11137163, -0.10798122, -0.05275498, -0.25439625,\n",
+       "        -0.23474178, -0.28271028, -0.99236228, -0.99247117, -0.99274143,\n",
+       "        -0.99263131, -0.99309876, -0.99314028, -0.99362108, -0.24468085,\n",
+       "        -0.32911392, -0.43492063, -0.35914011, -0.29655172, -0.25517241,\n",
+       "        -0.21088435, -0.25517241, -0.38356164, -0.29655172, -0.39310345,\n",
+       "        -0.29460581],\n",
+       "       [-0.14185229, -0.2028136 , -0.31690141, -0.17467761, -0.24970692,\n",
+       "        -0.25117371, -0.37383178, -0.9923242 , -0.99243567, -0.99271066,\n",
+       "        -0.9925968 , -0.9930667 , -0.99310849, -0.99359204, -0.22340426,\n",
+       "        -0.3164557 , -0.41587302, -0.35907171, -0.24137931, -0.25517241,\n",
+       "        -0.31972789, -0.24137931, -0.32876712, -0.31034483, -0.39310345,\n",
+       "        -0.29045643],\n",
+       "       [-0.4021102 , -0.32708089, -0.33802817, -0.28018757, -0.3950762 ,\n",
+       "        -0.40140845, -0.48364486, -0.99229459, -0.99240284, -0.99268055,\n",
+       "        -0.99256421, -0.99303731, -0.99308059, -0.99356773, -0.25531915,\n",
+       "        -0.29113924, -0.40952381, -0.35901131, -0.40689655, -0.31034483,\n",
+       "        -0.33333333, -0.28275862, -0.38356164, -0.39310345, -0.48965517,\n",
+       "        -0.37344398],\n",
+       "       [-0.27549824, -0.3059789 , -0.37089202, -0.20046893, -0.34818288,\n",
+       "        -0.33802817, -0.42056075, -0.99225863, -0.99237068, -0.99265109,\n",
+       "        -0.99252778, -0.99300725, -0.99305075, -0.99354072, -0.28723404,\n",
+       "        -0.41772152, -0.48571429, -0.3589459 , -0.28275862, -0.32413793,\n",
+       "        -0.34693878, -0.2       , -0.36986301, -0.35172414, -0.43448276,\n",
+       "        -0.32157676],\n",
+       "       [-0.30832356, -0.3059789 , -0.3286385 , -0.31066823, -0.32473623,\n",
+       "        -0.34741784, -0.38785047, -0.99222479, -0.99233786, -0.99262032,\n",
+       "        -0.9924971 , -0.99297519, -0.9930209 , -0.99351168, -0.28723404,\n",
+       "        -0.3164557 , -0.47936508, -0.3588813 , -0.32413793, -0.31034483,\n",
+       "        -0.31972789, -0.32413793, -0.32876712, -0.35172414, -0.39310345,\n",
+       "        -0.32987552],\n",
+       "       [-0.33645955, -0.2098476 , -0.24413146, -0.2919109 , -0.41383353,\n",
+       "        -0.41079812, -0.46495327, -0.99219025, -0.99230168, -0.99258563,\n",
+       "        -0.99246579, -0.99294781, -0.99299365, -0.9934867 , -0.24468085,\n",
+       "        -0.29113924, -0.42857143, -0.3588177 , -0.31034483, -0.24137931,\n",
+       "        -0.23809524, -0.31034483, -0.42465753, -0.40689655, -0.47586207,\n",
+       "        -0.34024896],\n",
+       "       [-0.24267292, -0.15357562, -0.19248826, -0.13950762, -0.35052755,\n",
+       "        -0.30046948, -0.37616822, -0.99215358, -0.99226215, -0.99254831,\n",
+       "        -0.99242872, -0.99291708, -0.99296121, -0.99345766, -0.22340426,\n",
+       "        -0.25316456, -0.42857143, -0.3587457 , -0.26896552, -0.17241379,\n",
+       "        -0.18367347, -0.1862069 , -0.35616438, -0.29655172, -0.39310345,\n",
+       "        -0.25311203],\n",
+       "       [-0.2989449 , -0.26377491, -0.27699531, -0.15592028, -0.34583822,\n",
+       "        -0.34976526, -0.48831776, -0.99211763, -0.99222731, -0.99251493,\n",
+       "        -0.99239038, -0.99288636, -0.99293072, -0.99343267, -0.20212766,\n",
+       "        -0.24050633, -0.3968254 , -0.35867929, -0.28275862, -0.26896552,\n",
+       "        -0.26530612, -0.15862069, -0.35616438, -0.33793103, -0.47586207,\n",
+       "        -0.31120332]])"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'][0]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## keras.Sequential.DoubleLSTMTimeSeriesClassifier\n",
+    "\n",
+    "* Input: X, y\n",
+    "* Output: \n",
+    "* Effect: DoubleLSTM has been fitted."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-01-18 05:32:48.464559: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2022-01-18 05:32:48.495873: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fba31d9b0c0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+      "2022-01-18 05:32:48.495892: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
+     ]
+    }
+   ],
+   "source": [
+    "step = 9\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tutorials/pipelines/lstm_regressor_with_unstack.ipynb b/tutorials/pipelines/lstm_regressor_with_unstack.ipynb
new file mode 100644
index 0000000..516c6da
--- /dev/null
+++ b/tutorials/pipelines/lstm_regressor_with_unstack.ipynb
@@ -0,0 +1,2499 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "488d2ccc",
+   "metadata": {},
+   "source": [
+    "# lstm_regressor_with_unstack"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "caf9a9ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from draco.demo import load_demo\n",
+    "\n",
+    "train_target_times, test_target_times, readings = load_demo()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "80315927",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_name = 'lstm_regressor_with_unstack'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "1073a88a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from draco import DracoPipeline\n",
+    "\n",
+    "pipeline = DracoPipeline(pipeline_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1c6cb15d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline.template['primitives']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "26bbb52d",
+   "metadata": {},
+   "source": [
+    "# Step by Step execution"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3f12ee16",
+   "metadata": {},
+   "source": [
+    "## Input Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "a2396b1c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:10:00</td>\n",
+       "      <td>operational setting 1</td>\n",
+       "      <td>-0.0007</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:20:00</td>\n",
+       "      <td>operational setting 1</td>\n",
+       "      <td>0.0019</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:30:00</td>\n",
+       "      <td>operational setting 1</td>\n",
+       "      <td>-0.0043</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:40:00</td>\n",
+       "      <td>operational setting 1</td>\n",
+       "      <td>0.0007</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:50:00</td>\n",
+       "      <td>operational setting 1</td>\n",
+       "      <td>-0.0019</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   turbine_id           timestamp              signal_id   value\n",
+       "0           1 2013-01-12 00:10:00  operational setting 1 -0.0007\n",
+       "1           1 2013-01-12 00:20:00  operational setting 1  0.0019\n",
+       "2           1 2013-01-12 00:30:00  operational setting 1 -0.0043\n",
+       "3           1 2013-01-12 00:40:00  operational setting 1  0.0007\n",
+       "4           1 2013-01-12 00:50:00  operational setting 1 -0.0019"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "3cd80f1f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:20:00</td>\n",
+       "      <td>166</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:30:00</td>\n",
+       "      <td>165</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:40:00</td>\n",
+       "      <td>164</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 04:50:00</td>\n",
+       "      <td>163</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 05:00:00</td>\n",
+       "      <td>162</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   turbine_id         cutoff_time  target\n",
+       "0           1 2013-01-12 04:20:00     166\n",
+       "1           1 2013-01-12 04:30:00     165\n",
+       "2           1 2013-01-12 04:40:00     164\n",
+       "3           1 2013-01-12 04:50:00     163\n",
+       "4           1 2013-01-12 05:00:00     162"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "train_target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "id": "6a759b57",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-13 13:10:00</td>\n",
+       "      <td>112.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>2013-01-14 08:00:00</td>\n",
+       "      <td>98.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>2013-01-14 02:50:00</td>\n",
+       "      <td>69.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>2013-01-14 01:10:00</td>\n",
+       "      <td>82.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>2013-01-14 13:10:00</td>\n",
+       "      <td>91.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   turbine_id         cutoff_time  target\n",
+       "0           1 2013-01-13 13:10:00   112.0\n",
+       "1           2 2013-01-14 08:00:00    98.0\n",
+       "2           3 2013-01-14 02:50:00    69.0\n",
+       "3           4 2013-01-14 01:10:00    82.0\n",
+       "4           5 2013-01-14 13:10:00    91.0"
+      ]
+     },
+     "execution_count": 40,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "test_target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "feb3daa6",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "training shape (18131, 3)\n",
+      "testing shape (100, 3)\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"training shape\", train_target_times.shape)\n",
+    "print(\"testing shape\", test_target_times.shape)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a956f746",
+   "metadata": {},
+   "source": [
+    "## Data Preparation (part of Draco Pipeline)\n",
+    "\n",
+    "* Input: target_times, readings, turbines\n",
+    "* Output: X, y, readings, turbines\n",
+    "* Effect: target_times has been split into X and y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a813a966",
+   "metadata": {},
+   "source": [
+    "## mlblocks.MLPipeline\n",
+    "\n",
+    "### pandas.DataFrame.resample\n",
+    "\n",
+    "* Input: readings\n",
+    "* Output: readings (resampled)\n",
+    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
+    "  signal_id and timestamp have been set as a multi-index\n",
+    "  \n",
+    "### pandas.DataFrame.unstack\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: readings (unstacked)\n",
+    "* Effect: readings have been unstacked"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "bb00b3b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "context = pipeline.fit(train_target_times, readings, output_=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "381e361d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "b41f13c1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_operational setting 1</th>\n",
+       "      <th>value_operational setting 2</th>\n",
+       "      <th>value_operational setting 3</th>\n",
+       "      <th>value_sensor measurement 1</th>\n",
+       "      <th>value_sensor measurement 10</th>\n",
+       "      <th>value_sensor measurement 11</th>\n",
+       "      <th>value_sensor measurement 12</th>\n",
+       "      <th>value_sensor measurement 13</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_sensor measurement 2</th>\n",
+       "      <th>value_sensor measurement 20</th>\n",
+       "      <th>value_sensor measurement 21</th>\n",
+       "      <th>value_sensor measurement 3</th>\n",
+       "      <th>value_sensor measurement 4</th>\n",
+       "      <th>value_sensor measurement 5</th>\n",
+       "      <th>value_sensor measurement 6</th>\n",
+       "      <th>value_sensor measurement 7</th>\n",
+       "      <th>value_sensor measurement 8</th>\n",
+       "      <th>value_sensor measurement 9</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:10:00</td>\n",
+       "      <td>-0.0007</td>\n",
+       "      <td>-0.0004</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.47</td>\n",
+       "      <td>521.66</td>\n",
+       "      <td>2388.02</td>\n",
+       "      <td>...</td>\n",
+       "      <td>641.82</td>\n",
+       "      <td>39.06</td>\n",
+       "      <td>23.4190</td>\n",
+       "      <td>1589.70</td>\n",
+       "      <td>1400.60</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.36</td>\n",
+       "      <td>2388.06</td>\n",
+       "      <td>9046.19</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:20:00</td>\n",
+       "      <td>0.0019</td>\n",
+       "      <td>-0.0003</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.49</td>\n",
+       "      <td>522.28</td>\n",
+       "      <td>2388.07</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.15</td>\n",
+       "      <td>39.00</td>\n",
+       "      <td>23.4236</td>\n",
+       "      <td>1591.82</td>\n",
+       "      <td>1403.14</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>553.75</td>\n",
+       "      <td>2388.04</td>\n",
+       "      <td>9044.07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:30:00</td>\n",
+       "      <td>-0.0043</td>\n",
+       "      <td>0.0003</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.27</td>\n",
+       "      <td>522.42</td>\n",
+       "      <td>2388.03</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.35</td>\n",
+       "      <td>38.95</td>\n",
+       "      <td>23.3442</td>\n",
+       "      <td>1587.99</td>\n",
+       "      <td>1404.20</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.26</td>\n",
+       "      <td>2388.08</td>\n",
+       "      <td>9052.94</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:40:00</td>\n",
+       "      <td>0.0007</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.13</td>\n",
+       "      <td>522.86</td>\n",
+       "      <td>2388.08</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.35</td>\n",
+       "      <td>38.88</td>\n",
+       "      <td>23.3739</td>\n",
+       "      <td>1582.79</td>\n",
+       "      <td>1401.87</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.45</td>\n",
+       "      <td>2388.11</td>\n",
+       "      <td>9049.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:50:00</td>\n",
+       "      <td>-0.0019</td>\n",
+       "      <td>-0.0002</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.28</td>\n",
+       "      <td>522.19</td>\n",
+       "      <td>2388.04</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.37</td>\n",
+       "      <td>38.90</td>\n",
+       "      <td>23.4044</td>\n",
+       "      <td>1582.85</td>\n",
+       "      <td>1406.22</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.00</td>\n",
+       "      <td>2388.06</td>\n",
+       "      <td>9055.15</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   turbine_id           timestamp  value_operational setting 1  \\\n",
+       "0           1 2013-01-12 00:10:00                      -0.0007   \n",
+       "1           1 2013-01-12 00:20:00                       0.0019   \n",
+       "2           1 2013-01-12 00:30:00                      -0.0043   \n",
+       "3           1 2013-01-12 00:40:00                       0.0007   \n",
+       "4           1 2013-01-12 00:50:00                      -0.0019   \n",
+       "\n",
+       "   value_operational setting 2  value_operational setting 3  \\\n",
+       "0                      -0.0004                        100.0   \n",
+       "1                      -0.0003                        100.0   \n",
+       "2                       0.0003                        100.0   \n",
+       "3                       0.0000                        100.0   \n",
+       "4                      -0.0002                        100.0   \n",
+       "\n",
+       "   value_sensor measurement 1  value_sensor measurement 10  \\\n",
+       "0                      518.67                          1.3   \n",
+       "1                      518.67                          1.3   \n",
+       "2                      518.67                          1.3   \n",
+       "3                      518.67                          1.3   \n",
+       "4                      518.67                          1.3   \n",
+       "\n",
+       "   value_sensor measurement 11  value_sensor measurement 12  \\\n",
+       "0                        47.47                       521.66   \n",
+       "1                        47.49                       522.28   \n",
+       "2                        47.27                       522.42   \n",
+       "3                        47.13                       522.86   \n",
+       "4                        47.28                       522.19   \n",
+       "\n",
+       "   value_sensor measurement 13  ...  value_sensor measurement 2  \\\n",
+       "0                      2388.02  ...                      641.82   \n",
+       "1                      2388.07  ...                      642.15   \n",
+       "2                      2388.03  ...                      642.35   \n",
+       "3                      2388.08  ...                      642.35   \n",
+       "4                      2388.04  ...                      642.37   \n",
+       "\n",
+       "   value_sensor measurement 20  value_sensor measurement 21  \\\n",
+       "0                        39.06                      23.4190   \n",
+       "1                        39.00                      23.4236   \n",
+       "2                        38.95                      23.3442   \n",
+       "3                        38.88                      23.3739   \n",
+       "4                        38.90                      23.4044   \n",
+       "\n",
+       "   value_sensor measurement 3  value_sensor measurement 4  \\\n",
+       "0                     1589.70                     1400.60   \n",
+       "1                     1591.82                     1403.14   \n",
+       "2                     1587.99                     1404.20   \n",
+       "3                     1582.79                     1401.87   \n",
+       "4                     1582.85                     1406.22   \n",
+       "\n",
+       "   value_sensor measurement 5  value_sensor measurement 6  \\\n",
+       "0                       14.62                       21.61   \n",
+       "1                       14.62                       21.61   \n",
+       "2                       14.62                       21.61   \n",
+       "3                       14.62                       21.61   \n",
+       "4                       14.62                       21.61   \n",
+       "\n",
+       "   value_sensor measurement 7  value_sensor measurement 8  \\\n",
+       "0                      554.36                     2388.06   \n",
+       "1                      553.75                     2388.04   \n",
+       "2                      554.26                     2388.08   \n",
+       "3                      554.45                     2388.11   \n",
+       "4                      554.00                     2388.06   \n",
+       "\n",
+       "   value_sensor measurement 9  \n",
+       "0                     9046.19  \n",
+       "1                     9044.07  \n",
+       "2                     9052.94  \n",
+       "3                     9049.48  \n",
+       "4                     9055.15  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5f521fd3",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.pop\n",
+    "\n",
+    "* Input: readings (unstacked)\n",
+    "* Output: readings (without turbine_id), turbine_id\n",
+    "* Effect: turbine_id has been popped from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "bb0bac75",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 1\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "1009407e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "93104c3b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    1\n",
+       "1    1\n",
+       "2    1\n",
+       "3    1\n",
+       "4    1\n",
+       "Name: turbine_id, dtype: int64"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['turbine_id'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "id": "83855579",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_operational setting 1</th>\n",
+       "      <th>value_operational setting 2</th>\n",
+       "      <th>value_operational setting 3</th>\n",
+       "      <th>value_sensor measurement 1</th>\n",
+       "      <th>value_sensor measurement 10</th>\n",
+       "      <th>value_sensor measurement 11</th>\n",
+       "      <th>value_sensor measurement 12</th>\n",
+       "      <th>value_sensor measurement 13</th>\n",
+       "      <th>value_sensor measurement 14</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_sensor measurement 2</th>\n",
+       "      <th>value_sensor measurement 20</th>\n",
+       "      <th>value_sensor measurement 21</th>\n",
+       "      <th>value_sensor measurement 3</th>\n",
+       "      <th>value_sensor measurement 4</th>\n",
+       "      <th>value_sensor measurement 5</th>\n",
+       "      <th>value_sensor measurement 6</th>\n",
+       "      <th>value_sensor measurement 7</th>\n",
+       "      <th>value_sensor measurement 8</th>\n",
+       "      <th>value_sensor measurement 9</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2013-01-12 00:10:00</td>\n",
+       "      <td>-0.0007</td>\n",
+       "      <td>-0.0004</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.47</td>\n",
+       "      <td>521.66</td>\n",
+       "      <td>2388.02</td>\n",
+       "      <td>8138.62</td>\n",
+       "      <td>...</td>\n",
+       "      <td>641.82</td>\n",
+       "      <td>39.06</td>\n",
+       "      <td>23.4190</td>\n",
+       "      <td>1589.70</td>\n",
+       "      <td>1400.60</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.36</td>\n",
+       "      <td>2388.06</td>\n",
+       "      <td>9046.19</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2013-01-12 00:20:00</td>\n",
+       "      <td>0.0019</td>\n",
+       "      <td>-0.0003</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.49</td>\n",
+       "      <td>522.28</td>\n",
+       "      <td>2388.07</td>\n",
+       "      <td>8131.49</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.15</td>\n",
+       "      <td>39.00</td>\n",
+       "      <td>23.4236</td>\n",
+       "      <td>1591.82</td>\n",
+       "      <td>1403.14</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>553.75</td>\n",
+       "      <td>2388.04</td>\n",
+       "      <td>9044.07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2013-01-12 00:30:00</td>\n",
+       "      <td>-0.0043</td>\n",
+       "      <td>0.0003</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.27</td>\n",
+       "      <td>522.42</td>\n",
+       "      <td>2388.03</td>\n",
+       "      <td>8133.23</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.35</td>\n",
+       "      <td>38.95</td>\n",
+       "      <td>23.3442</td>\n",
+       "      <td>1587.99</td>\n",
+       "      <td>1404.20</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.26</td>\n",
+       "      <td>2388.08</td>\n",
+       "      <td>9052.94</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2013-01-12 00:40:00</td>\n",
+       "      <td>0.0007</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.13</td>\n",
+       "      <td>522.86</td>\n",
+       "      <td>2388.08</td>\n",
+       "      <td>8133.83</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.35</td>\n",
+       "      <td>38.88</td>\n",
+       "      <td>23.3739</td>\n",
+       "      <td>1582.79</td>\n",
+       "      <td>1401.87</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.45</td>\n",
+       "      <td>2388.11</td>\n",
+       "      <td>9049.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2013-01-12 00:50:00</td>\n",
+       "      <td>-0.0019</td>\n",
+       "      <td>-0.0002</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.28</td>\n",
+       "      <td>522.19</td>\n",
+       "      <td>2388.04</td>\n",
+       "      <td>8133.80</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.37</td>\n",
+       "      <td>38.90</td>\n",
+       "      <td>23.4044</td>\n",
+       "      <td>1582.85</td>\n",
+       "      <td>1406.22</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.00</td>\n",
+       "      <td>2388.06</td>\n",
+       "      <td>9055.15</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 25 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            timestamp  value_operational setting 1  \\\n",
+       "0 2013-01-12 00:10:00                      -0.0007   \n",
+       "1 2013-01-12 00:20:00                       0.0019   \n",
+       "2 2013-01-12 00:30:00                      -0.0043   \n",
+       "3 2013-01-12 00:40:00                       0.0007   \n",
+       "4 2013-01-12 00:50:00                      -0.0019   \n",
+       "\n",
+       "   value_operational setting 2  value_operational setting 3  \\\n",
+       "0                      -0.0004                        100.0   \n",
+       "1                      -0.0003                        100.0   \n",
+       "2                       0.0003                        100.0   \n",
+       "3                       0.0000                        100.0   \n",
+       "4                      -0.0002                        100.0   \n",
+       "\n",
+       "   value_sensor measurement 1  value_sensor measurement 10  \\\n",
+       "0                      518.67                          1.3   \n",
+       "1                      518.67                          1.3   \n",
+       "2                      518.67                          1.3   \n",
+       "3                      518.67                          1.3   \n",
+       "4                      518.67                          1.3   \n",
+       "\n",
+       "   value_sensor measurement 11  value_sensor measurement 12  \\\n",
+       "0                        47.47                       521.66   \n",
+       "1                        47.49                       522.28   \n",
+       "2                        47.27                       522.42   \n",
+       "3                        47.13                       522.86   \n",
+       "4                        47.28                       522.19   \n",
+       "\n",
+       "   value_sensor measurement 13  value_sensor measurement 14  ...  \\\n",
+       "0                      2388.02                      8138.62  ...   \n",
+       "1                      2388.07                      8131.49  ...   \n",
+       "2                      2388.03                      8133.23  ...   \n",
+       "3                      2388.08                      8133.83  ...   \n",
+       "4                      2388.04                      8133.80  ...   \n",
+       "\n",
+       "   value_sensor measurement 2  value_sensor measurement 20  \\\n",
+       "0                      641.82                        39.06   \n",
+       "1                      642.15                        39.00   \n",
+       "2                      642.35                        38.95   \n",
+       "3                      642.35                        38.88   \n",
+       "4                      642.37                        38.90   \n",
+       "\n",
+       "   value_sensor measurement 21  value_sensor measurement 3  \\\n",
+       "0                      23.4190                     1589.70   \n",
+       "1                      23.4236                     1591.82   \n",
+       "2                      23.3442                     1587.99   \n",
+       "3                      23.3739                     1582.79   \n",
+       "4                      23.4044                     1582.85   \n",
+       "\n",
+       "   value_sensor measurement 4  value_sensor measurement 5  \\\n",
+       "0                     1400.60                       14.62   \n",
+       "1                     1403.14                       14.62   \n",
+       "2                     1404.20                       14.62   \n",
+       "3                     1401.87                       14.62   \n",
+       "4                     1406.22                       14.62   \n",
+       "\n",
+       "   value_sensor measurement 6  value_sensor measurement 7  \\\n",
+       "0                       21.61                      554.36   \n",
+       "1                       21.61                      553.75   \n",
+       "2                       21.61                      554.26   \n",
+       "3                       21.61                      554.45   \n",
+       "4                       21.61                      554.00   \n",
+       "\n",
+       "   value_sensor measurement 8  value_sensor measurement 9  \n",
+       "0                     2388.06                     9046.19  \n",
+       "1                     2388.04                     9044.07  \n",
+       "2                     2388.08                     9052.94  \n",
+       "3                     2388.11                     9049.48  \n",
+       "4                     2388.06                     9055.15  \n",
+       "\n",
+       "[5 rows x 25 columns]"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a43ffbb1",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.pop\n",
+    "\n",
+    "* Input: readings (without turbine_id)\n",
+    "* Output: readings (without timestamp), timestamp\n",
+    "* Effect: timestamp has been popped from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "ebcad5cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 2\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "id": "d497ab07",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "id": "2c3bfa0b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0   2013-01-12 00:10:00\n",
+       "1   2013-01-12 00:20:00\n",
+       "2   2013-01-12 00:30:00\n",
+       "3   2013-01-12 00:40:00\n",
+       "4   2013-01-12 00:50:00\n",
+       "Name: timestamp, dtype: datetime64[ns]"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['timestamp'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "3c837b44",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>value_operational setting 1</th>\n",
+       "      <th>value_operational setting 2</th>\n",
+       "      <th>value_operational setting 3</th>\n",
+       "      <th>value_sensor measurement 1</th>\n",
+       "      <th>value_sensor measurement 10</th>\n",
+       "      <th>value_sensor measurement 11</th>\n",
+       "      <th>value_sensor measurement 12</th>\n",
+       "      <th>value_sensor measurement 13</th>\n",
+       "      <th>value_sensor measurement 14</th>\n",
+       "      <th>value_sensor measurement 15</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_sensor measurement 2</th>\n",
+       "      <th>value_sensor measurement 20</th>\n",
+       "      <th>value_sensor measurement 21</th>\n",
+       "      <th>value_sensor measurement 3</th>\n",
+       "      <th>value_sensor measurement 4</th>\n",
+       "      <th>value_sensor measurement 5</th>\n",
+       "      <th>value_sensor measurement 6</th>\n",
+       "      <th>value_sensor measurement 7</th>\n",
+       "      <th>value_sensor measurement 8</th>\n",
+       "      <th>value_sensor measurement 9</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.0007</td>\n",
+       "      <td>-0.0004</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.47</td>\n",
+       "      <td>521.66</td>\n",
+       "      <td>2388.02</td>\n",
+       "      <td>8138.62</td>\n",
+       "      <td>8.4195</td>\n",
+       "      <td>...</td>\n",
+       "      <td>641.82</td>\n",
+       "      <td>39.06</td>\n",
+       "      <td>23.4190</td>\n",
+       "      <td>1589.70</td>\n",
+       "      <td>1400.60</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.36</td>\n",
+       "      <td>2388.06</td>\n",
+       "      <td>9046.19</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.0019</td>\n",
+       "      <td>-0.0003</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.49</td>\n",
+       "      <td>522.28</td>\n",
+       "      <td>2388.07</td>\n",
+       "      <td>8131.49</td>\n",
+       "      <td>8.4318</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.15</td>\n",
+       "      <td>39.00</td>\n",
+       "      <td>23.4236</td>\n",
+       "      <td>1591.82</td>\n",
+       "      <td>1403.14</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>553.75</td>\n",
+       "      <td>2388.04</td>\n",
+       "      <td>9044.07</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.0043</td>\n",
+       "      <td>0.0003</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.27</td>\n",
+       "      <td>522.42</td>\n",
+       "      <td>2388.03</td>\n",
+       "      <td>8133.23</td>\n",
+       "      <td>8.4178</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.35</td>\n",
+       "      <td>38.95</td>\n",
+       "      <td>23.3442</td>\n",
+       "      <td>1587.99</td>\n",
+       "      <td>1404.20</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.26</td>\n",
+       "      <td>2388.08</td>\n",
+       "      <td>9052.94</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.0007</td>\n",
+       "      <td>0.0000</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.13</td>\n",
+       "      <td>522.86</td>\n",
+       "      <td>2388.08</td>\n",
+       "      <td>8133.83</td>\n",
+       "      <td>8.3682</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.35</td>\n",
+       "      <td>38.88</td>\n",
+       "      <td>23.3739</td>\n",
+       "      <td>1582.79</td>\n",
+       "      <td>1401.87</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.45</td>\n",
+       "      <td>2388.11</td>\n",
+       "      <td>9049.48</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.0019</td>\n",
+       "      <td>-0.0002</td>\n",
+       "      <td>100.0</td>\n",
+       "      <td>518.67</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>47.28</td>\n",
+       "      <td>522.19</td>\n",
+       "      <td>2388.04</td>\n",
+       "      <td>8133.80</td>\n",
+       "      <td>8.4294</td>\n",
+       "      <td>...</td>\n",
+       "      <td>642.37</td>\n",
+       "      <td>38.90</td>\n",
+       "      <td>23.4044</td>\n",
+       "      <td>1582.85</td>\n",
+       "      <td>1406.22</td>\n",
+       "      <td>14.62</td>\n",
+       "      <td>21.61</td>\n",
+       "      <td>554.00</td>\n",
+       "      <td>2388.06</td>\n",
+       "      <td>9055.15</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 24 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   value_operational setting 1  value_operational setting 2  \\\n",
+       "0                      -0.0007                      -0.0004   \n",
+       "1                       0.0019                      -0.0003   \n",
+       "2                      -0.0043                       0.0003   \n",
+       "3                       0.0007                       0.0000   \n",
+       "4                      -0.0019                      -0.0002   \n",
+       "\n",
+       "   value_operational setting 3  value_sensor measurement 1  \\\n",
+       "0                        100.0                      518.67   \n",
+       "1                        100.0                      518.67   \n",
+       "2                        100.0                      518.67   \n",
+       "3                        100.0                      518.67   \n",
+       "4                        100.0                      518.67   \n",
+       "\n",
+       "   value_sensor measurement 10  value_sensor measurement 11  \\\n",
+       "0                          1.3                        47.47   \n",
+       "1                          1.3                        47.49   \n",
+       "2                          1.3                        47.27   \n",
+       "3                          1.3                        47.13   \n",
+       "4                          1.3                        47.28   \n",
+       "\n",
+       "   value_sensor measurement 12  value_sensor measurement 13  \\\n",
+       "0                       521.66                      2388.02   \n",
+       "1                       522.28                      2388.07   \n",
+       "2                       522.42                      2388.03   \n",
+       "3                       522.86                      2388.08   \n",
+       "4                       522.19                      2388.04   \n",
+       "\n",
+       "   value_sensor measurement 14  value_sensor measurement 15  ...  \\\n",
+       "0                      8138.62                       8.4195  ...   \n",
+       "1                      8131.49                       8.4318  ...   \n",
+       "2                      8133.23                       8.4178  ...   \n",
+       "3                      8133.83                       8.3682  ...   \n",
+       "4                      8133.80                       8.4294  ...   \n",
+       "\n",
+       "   value_sensor measurement 2  value_sensor measurement 20  \\\n",
+       "0                      641.82                        39.06   \n",
+       "1                      642.15                        39.00   \n",
+       "2                      642.35                        38.95   \n",
+       "3                      642.35                        38.88   \n",
+       "4                      642.37                        38.90   \n",
+       "\n",
+       "   value_sensor measurement 21  value_sensor measurement 3  \\\n",
+       "0                      23.4190                     1589.70   \n",
+       "1                      23.4236                     1591.82   \n",
+       "2                      23.3442                     1587.99   \n",
+       "3                      23.3739                     1582.79   \n",
+       "4                      23.4044                     1582.85   \n",
+       "\n",
+       "   value_sensor measurement 4  value_sensor measurement 5  \\\n",
+       "0                     1400.60                       14.62   \n",
+       "1                     1403.14                       14.62   \n",
+       "2                     1404.20                       14.62   \n",
+       "3                     1401.87                       14.62   \n",
+       "4                     1406.22                       14.62   \n",
+       "\n",
+       "   value_sensor measurement 6  value_sensor measurement 7  \\\n",
+       "0                       21.61                      554.36   \n",
+       "1                       21.61                      553.75   \n",
+       "2                       21.61                      554.26   \n",
+       "3                       21.61                      554.45   \n",
+       "4                       21.61                      554.00   \n",
+       "\n",
+       "   value_sensor measurement 8  value_sensor measurement 9  \n",
+       "0                     2388.06                     9046.19  \n",
+       "1                     2388.04                     9044.07  \n",
+       "2                     2388.08                     9052.94  \n",
+       "3                     2388.11                     9049.48  \n",
+       "4                     2388.06                     9055.15  \n",
+       "\n",
+       "[5 rows x 24 columns]"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "78cc4b36",
+   "metadata": {},
+   "source": [
+    "## sklearn.impute.SimpleImputer\n",
+    "\n",
+    "* Input: readings (unstacked, no turbine_id, no timestamp)\n",
+    "* Output: readings (imputed, numpy array)\n",
+    "* Effect: readings have been imputed and converted to numpy array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "3ad08e01",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 3\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "19c4ee50",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "id": "af5f9dc1",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-7.00000e-04, -4.00000e-04,  1.00000e+02,  5.18670e+02,\n",
+       "         1.30000e+00,  4.74700e+01,  5.21660e+02,  2.38802e+03,\n",
+       "         8.13862e+03,  8.41950e+00,  3.00000e-02,  3.92000e+02,\n",
+       "         2.38800e+03,  1.00000e+02,  6.41820e+02,  3.90600e+01,\n",
+       "         2.34190e+01,  1.58970e+03,  1.40060e+03,  1.46200e+01,\n",
+       "         2.16100e+01,  5.54360e+02,  2.38806e+03,  9.04619e+03],\n",
+       "       [ 1.90000e-03, -3.00000e-04,  1.00000e+02,  5.18670e+02,\n",
+       "         1.30000e+00,  4.74900e+01,  5.22280e+02,  2.38807e+03,\n",
+       "         8.13149e+03,  8.43180e+00,  3.00000e-02,  3.92000e+02,\n",
+       "         2.38800e+03,  1.00000e+02,  6.42150e+02,  3.90000e+01,\n",
+       "         2.34236e+01,  1.59182e+03,  1.40314e+03,  1.46200e+01,\n",
+       "         2.16100e+01,  5.53750e+02,  2.38804e+03,  9.04407e+03],\n",
+       "       [-4.30000e-03,  3.00000e-04,  1.00000e+02,  5.18670e+02,\n",
+       "         1.30000e+00,  4.72700e+01,  5.22420e+02,  2.38803e+03,\n",
+       "         8.13323e+03,  8.41780e+00,  3.00000e-02,  3.90000e+02,\n",
+       "         2.38800e+03,  1.00000e+02,  6.42350e+02,  3.89500e+01,\n",
+       "         2.33442e+01,  1.58799e+03,  1.40420e+03,  1.46200e+01,\n",
+       "         2.16100e+01,  5.54260e+02,  2.38808e+03,  9.05294e+03],\n",
+       "       [ 7.00000e-04,  0.00000e+00,  1.00000e+02,  5.18670e+02,\n",
+       "         1.30000e+00,  4.71300e+01,  5.22860e+02,  2.38808e+03,\n",
+       "         8.13383e+03,  8.36820e+00,  3.00000e-02,  3.92000e+02,\n",
+       "         2.38800e+03,  1.00000e+02,  6.42350e+02,  3.88800e+01,\n",
+       "         2.33739e+01,  1.58279e+03,  1.40187e+03,  1.46200e+01,\n",
+       "         2.16100e+01,  5.54450e+02,  2.38811e+03,  9.04948e+03],\n",
+       "       [-1.90000e-03, -2.00000e-04,  1.00000e+02,  5.18670e+02,\n",
+       "         1.30000e+00,  4.72800e+01,  5.22190e+02,  2.38804e+03,\n",
+       "         8.13380e+03,  8.42940e+00,  3.00000e-02,  3.93000e+02,\n",
+       "         2.38800e+03,  1.00000e+02,  6.42370e+02,  3.89000e+01,\n",
+       "         2.34044e+01,  1.58285e+03,  1.40622e+03,  1.46200e+01,\n",
+       "         2.16100e+01,  5.54000e+02,  2.38806e+03,  9.05515e+03]])"
+      ]
+     },
+     "execution_count": 18,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'][0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1e0df4b2",
+   "metadata": {},
+   "source": [
+    "## sklearn.preprocessing.MinMaxScaler\n",
+    "\n",
+    "* Input: (imputed, array)\n",
+    "* Output: readings (scaled, array)\n",
+    "* Effect: readings have been scaled to [-1, 1] range"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "id": "f50662d2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 4\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "37bf8d65",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "73c5d941",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.08045977, -0.69230769, -1.        , -1.        , -1.        ,\n",
+       "        -0.22543353,  0.17159763, -0.58823529, -0.60078439, -0.2720277 ,\n",
+       "        -1.        , -0.33333333, -1.        , -1.        , -0.59411765,\n",
+       "         0.42635659,  0.40377157, -0.13682891, -0.38048616, -1.        ,\n",
+       "         1.        ,  0.45249597, -0.49253731, -0.78048999],\n",
+       "       [ 0.2183908 , -0.53846154, -1.        , -1.        , -1.        ,\n",
+       "        -0.20231214,  0.41617357, -0.44117647, -0.674373  , -0.17737591,\n",
+       "        -1.        , -0.33333333, -1.        , -1.        , -0.4       ,\n",
+       "         0.33333333,  0.41607597, -0.04825569, -0.29473329, -1.        ,\n",
+       "         1.        ,  0.25603865, -0.55223881, -0.79951539],\n",
+       "       [-0.49425287,  0.38461538, -1.        , -1.        , -1.        ,\n",
+       "        -0.4566474 ,  0.47140039, -0.55882353, -0.65641449, -0.28510966,\n",
+       "        -1.        , -0.66666667, -1.        , -1.        , -0.28235294,\n",
+       "         0.25581395,  0.20369132, -0.2082724 , -0.25894666, -1.        ,\n",
+       "         1.        ,  0.42028986, -0.43283582, -0.71991385],\n",
+       "       [ 0.08045977, -0.07692308, -1.        , -1.        , -1.        ,\n",
+       "        -0.61849711,  0.64497041, -0.41176471, -0.6502219 , -0.66679492,\n",
+       "        -1.        , -0.33333333, -1.        , -1.        , -0.28235294,\n",
+       "         0.14728682,  0.28313495, -0.42552747, -0.33760972, -1.        ,\n",
+       "         1.        ,  0.48148148, -0.34328358, -0.75096473],\n",
+       "       [-0.2183908 , -0.38461538, -1.        , -1.        , -1.        ,\n",
+       "        -0.44508671,  0.38067061, -0.52941176, -0.65053153, -0.19584456,\n",
+       "        -1.        , -0.16666667, -1.        , -1.        , -0.27058824,\n",
+       "         0.17829457,  0.36471847, -0.42302068, -0.19074949, -1.        ,\n",
+       "         1.        ,  0.33655395, -0.49253731, -0.70008077]])"
+      ]
+     },
+     "execution_count": 21,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'][0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e483b0ae",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame\n",
+    "\n",
+    "* Input: readings (scaled, array)\n",
+    "* Output: readings (dataframe)\n",
+    "* Effect: readings have been converted into a dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "4722001e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 5\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "id": "34b5d2ca",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "011b9c51",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>14</th>\n",
+       "      <th>15</th>\n",
+       "      <th>16</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.080460</td>\n",
+       "      <td>-0.692308</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.225434</td>\n",
+       "      <td>0.171598</td>\n",
+       "      <td>-0.588235</td>\n",
+       "      <td>-0.600784</td>\n",
+       "      <td>-0.272028</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.594118</td>\n",
+       "      <td>0.426357</td>\n",
+       "      <td>0.403772</td>\n",
+       "      <td>-0.136829</td>\n",
+       "      <td>-0.380486</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.452496</td>\n",
+       "      <td>-0.492537</td>\n",
+       "      <td>-0.780490</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.218391</td>\n",
+       "      <td>-0.538462</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.202312</td>\n",
+       "      <td>0.416174</td>\n",
+       "      <td>-0.441176</td>\n",
+       "      <td>-0.674373</td>\n",
+       "      <td>-0.177376</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.400000</td>\n",
+       "      <td>0.333333</td>\n",
+       "      <td>0.416076</td>\n",
+       "      <td>-0.048256</td>\n",
+       "      <td>-0.294733</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.256039</td>\n",
+       "      <td>-0.552239</td>\n",
+       "      <td>-0.799515</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.494253</td>\n",
+       "      <td>0.384615</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.456647</td>\n",
+       "      <td>0.471400</td>\n",
+       "      <td>-0.558824</td>\n",
+       "      <td>-0.656414</td>\n",
+       "      <td>-0.285110</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.282353</td>\n",
+       "      <td>0.255814</td>\n",
+       "      <td>0.203691</td>\n",
+       "      <td>-0.208272</td>\n",
+       "      <td>-0.258947</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.420290</td>\n",
+       "      <td>-0.432836</td>\n",
+       "      <td>-0.719914</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.080460</td>\n",
+       "      <td>-0.076923</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.618497</td>\n",
+       "      <td>0.644970</td>\n",
+       "      <td>-0.411765</td>\n",
+       "      <td>-0.650222</td>\n",
+       "      <td>-0.666795</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.282353</td>\n",
+       "      <td>0.147287</td>\n",
+       "      <td>0.283135</td>\n",
+       "      <td>-0.425527</td>\n",
+       "      <td>-0.337610</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.481481</td>\n",
+       "      <td>-0.343284</td>\n",
+       "      <td>-0.750965</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.218391</td>\n",
+       "      <td>-0.384615</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.445087</td>\n",
+       "      <td>0.380671</td>\n",
+       "      <td>-0.529412</td>\n",
+       "      <td>-0.650532</td>\n",
+       "      <td>-0.195845</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.270588</td>\n",
+       "      <td>0.178295</td>\n",
+       "      <td>0.364718</td>\n",
+       "      <td>-0.423021</td>\n",
+       "      <td>-0.190749</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.336554</td>\n",
+       "      <td>-0.492537</td>\n",
+       "      <td>-0.700081</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 24 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         0         1    2    3    4         5         6         7         8   \\\n",
+       "0 -0.080460 -0.692308 -1.0 -1.0 -1.0 -0.225434  0.171598 -0.588235 -0.600784   \n",
+       "1  0.218391 -0.538462 -1.0 -1.0 -1.0 -0.202312  0.416174 -0.441176 -0.674373   \n",
+       "2 -0.494253  0.384615 -1.0 -1.0 -1.0 -0.456647  0.471400 -0.558824 -0.656414   \n",
+       "3  0.080460 -0.076923 -1.0 -1.0 -1.0 -0.618497  0.644970 -0.411765 -0.650222   \n",
+       "4 -0.218391 -0.384615 -1.0 -1.0 -1.0 -0.445087  0.380671 -0.529412 -0.650532   \n",
+       "\n",
+       "         9   ...        14        15        16        17        18   19   20  \\\n",
+       "0 -0.272028  ... -0.594118  0.426357  0.403772 -0.136829 -0.380486 -1.0  1.0   \n",
+       "1 -0.177376  ... -0.400000  0.333333  0.416076 -0.048256 -0.294733 -1.0  1.0   \n",
+       "2 -0.285110  ... -0.282353  0.255814  0.203691 -0.208272 -0.258947 -1.0  1.0   \n",
+       "3 -0.666795  ... -0.282353  0.147287  0.283135 -0.425527 -0.337610 -1.0  1.0   \n",
+       "4 -0.195845  ... -0.270588  0.178295  0.364718 -0.423021 -0.190749 -1.0  1.0   \n",
+       "\n",
+       "         21        22        23  \n",
+       "0  0.452496 -0.492537 -0.780490  \n",
+       "1  0.256039 -0.552239 -0.799515  \n",
+       "2  0.420290 -0.432836 -0.719914  \n",
+       "3  0.481481 -0.343284 -0.750965  \n",
+       "4  0.336554 -0.492537 -0.700081  \n",
+       "\n",
+       "[5 rows x 24 columns]"
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9539c0e6",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.set\n",
+    "\n",
+    "* Input: readings (dataframe)\n",
+    "* Output: readings (dataframe with turbine_id)\n",
+    "* Effect: turbine_id has been set as a readings column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "id": "d58c17c1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 6\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "b5b62c52",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "8bedb44e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>15</th>\n",
+       "      <th>16</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>turbine_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.080460</td>\n",
+       "      <td>-0.692308</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.225434</td>\n",
+       "      <td>0.171598</td>\n",
+       "      <td>-0.588235</td>\n",
+       "      <td>-0.600784</td>\n",
+       "      <td>-0.272028</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.426357</td>\n",
+       "      <td>0.403772</td>\n",
+       "      <td>-0.136829</td>\n",
+       "      <td>-0.380486</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.452496</td>\n",
+       "      <td>-0.492537</td>\n",
+       "      <td>-0.780490</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.218391</td>\n",
+       "      <td>-0.538462</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.202312</td>\n",
+       "      <td>0.416174</td>\n",
+       "      <td>-0.441176</td>\n",
+       "      <td>-0.674373</td>\n",
+       "      <td>-0.177376</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.333333</td>\n",
+       "      <td>0.416076</td>\n",
+       "      <td>-0.048256</td>\n",
+       "      <td>-0.294733</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.256039</td>\n",
+       "      <td>-0.552239</td>\n",
+       "      <td>-0.799515</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.494253</td>\n",
+       "      <td>0.384615</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.456647</td>\n",
+       "      <td>0.471400</td>\n",
+       "      <td>-0.558824</td>\n",
+       "      <td>-0.656414</td>\n",
+       "      <td>-0.285110</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.255814</td>\n",
+       "      <td>0.203691</td>\n",
+       "      <td>-0.208272</td>\n",
+       "      <td>-0.258947</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.420290</td>\n",
+       "      <td>-0.432836</td>\n",
+       "      <td>-0.719914</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.080460</td>\n",
+       "      <td>-0.076923</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.618497</td>\n",
+       "      <td>0.644970</td>\n",
+       "      <td>-0.411765</td>\n",
+       "      <td>-0.650222</td>\n",
+       "      <td>-0.666795</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.147287</td>\n",
+       "      <td>0.283135</td>\n",
+       "      <td>-0.425527</td>\n",
+       "      <td>-0.337610</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.481481</td>\n",
+       "      <td>-0.343284</td>\n",
+       "      <td>-0.750965</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.218391</td>\n",
+       "      <td>-0.384615</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.445087</td>\n",
+       "      <td>0.380671</td>\n",
+       "      <td>-0.529412</td>\n",
+       "      <td>-0.650532</td>\n",
+       "      <td>-0.195845</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.178295</td>\n",
+       "      <td>0.364718</td>\n",
+       "      <td>-0.423021</td>\n",
+       "      <td>-0.190749</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.336554</td>\n",
+       "      <td>-0.492537</td>\n",
+       "      <td>-0.700081</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 25 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1    2    3    4         5         6         7         8  \\\n",
+       "0 -0.080460 -0.692308 -1.0 -1.0 -1.0 -0.225434  0.171598 -0.588235 -0.600784   \n",
+       "1  0.218391 -0.538462 -1.0 -1.0 -1.0 -0.202312  0.416174 -0.441176 -0.674373   \n",
+       "2 -0.494253  0.384615 -1.0 -1.0 -1.0 -0.456647  0.471400 -0.558824 -0.656414   \n",
+       "3  0.080460 -0.076923 -1.0 -1.0 -1.0 -0.618497  0.644970 -0.411765 -0.650222   \n",
+       "4 -0.218391 -0.384615 -1.0 -1.0 -1.0 -0.445087  0.380671 -0.529412 -0.650532   \n",
+       "\n",
+       "          9  ...        15        16        17        18   19   20        21  \\\n",
+       "0 -0.272028  ...  0.426357  0.403772 -0.136829 -0.380486 -1.0  1.0  0.452496   \n",
+       "1 -0.177376  ...  0.333333  0.416076 -0.048256 -0.294733 -1.0  1.0  0.256039   \n",
+       "2 -0.285110  ...  0.255814  0.203691 -0.208272 -0.258947 -1.0  1.0  0.420290   \n",
+       "3 -0.666795  ...  0.147287  0.283135 -0.425527 -0.337610 -1.0  1.0  0.481481   \n",
+       "4 -0.195845  ...  0.178295  0.364718 -0.423021 -0.190749 -1.0  1.0  0.336554   \n",
+       "\n",
+       "         22        23  turbine_id  \n",
+       "0 -0.492537 -0.780490           1  \n",
+       "1 -0.552239 -0.799515           1  \n",
+       "2 -0.432836 -0.719914           1  \n",
+       "3 -0.343284 -0.750965           1  \n",
+       "4 -0.492537 -0.700081           1  \n",
+       "\n",
+       "[5 rows x 25 columns]"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f2849d45",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.set\n",
+    "\n",
+    "* Input: readings (dataframe with turbine_id)\n",
+    "* Output: readings (dataframe with turbine_id and timestamp)\n",
+    "* Effect: timestamp has been set as a readings column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "9896ef19",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 7\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "384e4e91",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "7dcc2b2c",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>16</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.080460</td>\n",
+       "      <td>-0.692308</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.225434</td>\n",
+       "      <td>0.171598</td>\n",
+       "      <td>-0.588235</td>\n",
+       "      <td>-0.600784</td>\n",
+       "      <td>-0.272028</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.403772</td>\n",
+       "      <td>-0.136829</td>\n",
+       "      <td>-0.380486</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.452496</td>\n",
+       "      <td>-0.492537</td>\n",
+       "      <td>-0.780490</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:10:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>0.218391</td>\n",
+       "      <td>-0.538462</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.202312</td>\n",
+       "      <td>0.416174</td>\n",
+       "      <td>-0.441176</td>\n",
+       "      <td>-0.674373</td>\n",
+       "      <td>-0.177376</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.416076</td>\n",
+       "      <td>-0.048256</td>\n",
+       "      <td>-0.294733</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.256039</td>\n",
+       "      <td>-0.552239</td>\n",
+       "      <td>-0.799515</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:20:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.494253</td>\n",
+       "      <td>0.384615</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.456647</td>\n",
+       "      <td>0.471400</td>\n",
+       "      <td>-0.558824</td>\n",
+       "      <td>-0.656414</td>\n",
+       "      <td>-0.285110</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.203691</td>\n",
+       "      <td>-0.208272</td>\n",
+       "      <td>-0.258947</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.420290</td>\n",
+       "      <td>-0.432836</td>\n",
+       "      <td>-0.719914</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:30:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>0.080460</td>\n",
+       "      <td>-0.076923</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.618497</td>\n",
+       "      <td>0.644970</td>\n",
+       "      <td>-0.411765</td>\n",
+       "      <td>-0.650222</td>\n",
+       "      <td>-0.666795</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.283135</td>\n",
+       "      <td>-0.425527</td>\n",
+       "      <td>-0.337610</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.481481</td>\n",
+       "      <td>-0.343284</td>\n",
+       "      <td>-0.750965</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:40:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.218391</td>\n",
+       "      <td>-0.384615</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>-0.445087</td>\n",
+       "      <td>0.380671</td>\n",
+       "      <td>-0.529412</td>\n",
+       "      <td>-0.650532</td>\n",
+       "      <td>-0.195845</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.364718</td>\n",
+       "      <td>-0.423021</td>\n",
+       "      <td>-0.190749</td>\n",
+       "      <td>-1.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.336554</td>\n",
+       "      <td>-0.492537</td>\n",
+       "      <td>-0.700081</td>\n",
+       "      <td>1</td>\n",
+       "      <td>2013-01-12 00:50:00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1    2    3    4         5         6         7         8  \\\n",
+       "0 -0.080460 -0.692308 -1.0 -1.0 -1.0 -0.225434  0.171598 -0.588235 -0.600784   \n",
+       "1  0.218391 -0.538462 -1.0 -1.0 -1.0 -0.202312  0.416174 -0.441176 -0.674373   \n",
+       "2 -0.494253  0.384615 -1.0 -1.0 -1.0 -0.456647  0.471400 -0.558824 -0.656414   \n",
+       "3  0.080460 -0.076923 -1.0 -1.0 -1.0 -0.618497  0.644970 -0.411765 -0.650222   \n",
+       "4 -0.218391 -0.384615 -1.0 -1.0 -1.0 -0.445087  0.380671 -0.529412 -0.650532   \n",
+       "\n",
+       "          9  ...        16        17        18   19   20        21        22  \\\n",
+       "0 -0.272028  ...  0.403772 -0.136829 -0.380486 -1.0  1.0  0.452496 -0.492537   \n",
+       "1 -0.177376  ...  0.416076 -0.048256 -0.294733 -1.0  1.0  0.256039 -0.552239   \n",
+       "2 -0.285110  ...  0.203691 -0.208272 -0.258947 -1.0  1.0  0.420290 -0.432836   \n",
+       "3 -0.666795  ...  0.283135 -0.425527 -0.337610 -1.0  1.0  0.481481 -0.343284   \n",
+       "4 -0.195845  ...  0.364718 -0.423021 -0.190749 -1.0  1.0  0.336554 -0.492537   \n",
+       "\n",
+       "         23  turbine_id           timestamp  \n",
+       "0 -0.780490           1 2013-01-12 00:10:00  \n",
+       "1 -0.799515           1 2013-01-12 00:20:00  \n",
+       "2 -0.719914           1 2013-01-12 00:30:00  \n",
+       "3 -0.750965           1 2013-01-12 00:40:00  \n",
+       "4 -0.700081           1 2013-01-12 00:50:00  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "087b270d",
+   "metadata": {},
+   "source": [
+    "## mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences\n",
+    "\n",
+    "* Input: X, readings (dataframe with turbine_id and timestamp)\n",
+    "* Output: X\n",
+    "* Effect: X has been converted to a 3d numpy array that contains 1 matrix of shape\n",
+    "  (window_size x num_signals) for each one of the target times."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "b4ff2d0a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline._pipeline.get_hyperparameters()[\n",
+    "    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "2c8fd174",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 8\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "b051da01",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "a802d22b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(33727, 26)"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "cc53012b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(18131,)"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['y'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "b1212aaf",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(18131, 24, 24)"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "87abb56d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 0.2183908 , -0.53846154, -1.        , -1.        , -1.        ,\n",
+       "        -0.20231214,  0.41617357, -0.44117647, -0.674373  , -0.17737591,\n",
+       "        -1.        , -0.33333333, -1.        , -1.        , -0.4       ,\n",
+       "         0.33333333,  0.41607597, -0.04825569, -0.29473329, -1.        ,\n",
+       "         1.        ,  0.25603865, -0.55223881, -0.79951539],\n",
+       "       [-0.49425287,  0.38461538, -1.        , -1.        , -1.        ,\n",
+       "        -0.4566474 ,  0.47140039, -0.55882353, -0.65641449, -0.28510966,\n",
+       "        -1.        , -0.66666667, -1.        , -1.        , -0.28235294,\n",
+       "         0.25581395,  0.20369132, -0.2082724 , -0.25894666, -1.        ,\n",
+       "         1.        ,  0.42028986, -0.43283582, -0.71991385],\n",
+       "       [ 0.08045977, -0.07692308, -1.        , -1.        , -1.        ,\n",
+       "        -0.61849711,  0.64497041, -0.41176471, -0.6502219 , -0.66679492,\n",
+       "        -1.        , -0.33333333, -1.        , -1.        , -0.28235294,\n",
+       "         0.14728682,  0.28313495, -0.42552747, -0.33760972, -1.        ,\n",
+       "         1.        ,  0.48148148, -0.34328358, -0.75096473]])"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'][0][:3]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8876f20e",
+   "metadata": {},
+   "source": [
+    "## keras.Sequential.LSTMTimeSeriesRegressor\n",
+    "\n",
+    "* Input: X, y\n",
+    "* Output: \n",
+    "* Effect: LSTM has been fitted."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "561c3e09",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-02-01 10:08:21.044547: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2022-02-01 10:08:21.080727: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f8579596430 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+      "2022-02-01 10:08:21.080742: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
+     ]
+    }
+   ],
+   "source": [
+    "step = 9\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/tutorials/pipelines/lstm_with_unstack.ipynb b/tutorials/pipelines/lstm_with_unstack.ipynb
new file mode 100644
index 0000000..799b90e
--- /dev/null
+++ b/tutorials/pipelines/lstm_with_unstack.ipynb
@@ -0,0 +1,2249 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# lstm_with_unstack"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from draco.demo import load_demo\n",
+    "\n",
+    "target_times, readings = load_demo()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_name = 'lstm_with_unstack'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from draco.pipeline import DracoPipeline\n",
+    "\n",
+    "pipeline = DracoPipeline(pipeline_name)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['mlblocks.MLPipeline',\n",
+       " 'pandas.DataFrame.pop',\n",
+       " 'pandas.DataFrame.pop',\n",
+       " 'sklearn.impute.SimpleImputer',\n",
+       " 'sklearn.preprocessing.MinMaxScaler',\n",
+       " 'pandas.DataFrame',\n",
+       " 'pandas.DataFrame.set',\n",
+       " 'pandas.DataFrame.set',\n",
+       " 'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
+       " 'keras.Sequential.LSTMTimeSeriesClassifier']"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline.template['primitives']"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Step by Step execution"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Input Data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>320.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>284.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>348.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>273.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-10  323.0\n",
+       "1       T001       S02 2013-01-10  320.0\n",
+       "2       T001       S03 2013-01-10  284.0\n",
+       "3       T001       S04 2013-01-10  348.0\n",
+       "4       T001       S05 2013-01-10  273.0"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "readings.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>cutoff_time</th>\n",
+       "      <th>target</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-13</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-14</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-15</td>\n",
+       "      <td>1</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-16</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id cutoff_time  target\n",
+       "0       T001  2013-01-12       0\n",
+       "1       T001  2013-01-13       0\n",
+       "2       T001  2013-01-14       0\n",
+       "3       T001  2013-01-15       1\n",
+       "4       T001  2013-01-16       0"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Data Preparation (part of Draco Pipeline)\n",
+    "\n",
+    "* Input: target_times, readings, turbines\n",
+    "* Output: X, y, readings, turbines\n",
+    "* Effect: target_times has been split into X and y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mlblocks.MLPipeline\n",
+    "\n",
+    "### pandas.DataFrame.resample\n",
+    "\n",
+    "* Input: readings\n",
+    "* Output: readings (resampled)\n",
+    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
+    "  signal_id and timestamp have been set as a multi-index\n",
+    "  \n",
+    "### pandas.DataFrame.unstack\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: readings (unstacked)\n",
+    "* Effect: readings have been unstacked"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "context = pipeline.fit(target_times, readings, output_=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  turbine_id           timestamp  value_S01  value_S02  value_S03  value_S04  \\\n",
+       "0       T001 2013-01-10 00:00:00      323.0      320.0      284.0      348.0   \n",
+       "1       T001 2013-01-10 00:10:00      346.0      384.0      367.0      411.0   \n",
+       "2       T001 2013-01-10 00:20:00      407.0      363.0      407.0      393.0   \n",
+       "3       T001 2013-01-10 00:30:00      257.0      307.0      315.0      361.0   \n",
+       "4       T001 2013-01-10 00:40:00      267.0      309.0      314.0      355.0   \n",
+       "\n",
+       "   value_S05  value_S06  value_S07  value_S08  ...  value_S17  value_S18  \\\n",
+       "0      273.0      342.0      280.0  3197842.0  ...       11.7  3131020.0   \n",
+       "1      331.0      360.0      249.0  3197900.0  ...       10.2  3131420.0   \n",
+       "2      275.0      335.0      270.0  3197968.0  ...        9.5  3131822.0   \n",
+       "3      317.0      354.0      271.0  3198011.0  ...       10.5  3132179.0   \n",
+       "4      262.0      246.0      212.0  3198056.0  ...        9.6  3132501.0   \n",
+       "\n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "\n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.pop\n",
+    "\n",
+    "* Input: readings (unstacked)\n",
+    "* Output: readings (without turbine_id), turbine_id\n",
+    "* Effect: turbine_id has been popped from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 1\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0    T001\n",
+       "1    T001\n",
+       "2    T001\n",
+       "3    T001\n",
+       "4    T001\n",
+       "Name: turbine_id, dtype: object"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['turbine_id'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>value_S09</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
+       "      <td>695000.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
+       "      <td>695063.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
+       "      <td>695124.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
+       "      <td>695175.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
+       "      <td>695226.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            timestamp  value_S01  value_S02  value_S03  value_S04  value_S05  \\\n",
+       "0 2013-01-10 00:00:00      323.0      320.0      284.0      348.0      273.0   \n",
+       "1 2013-01-10 00:10:00      346.0      384.0      367.0      411.0      331.0   \n",
+       "2 2013-01-10 00:20:00      407.0      363.0      407.0      393.0      275.0   \n",
+       "3 2013-01-10 00:30:00      257.0      307.0      315.0      361.0      317.0   \n",
+       "4 2013-01-10 00:40:00      267.0      309.0      314.0      355.0      262.0   \n",
+       "\n",
+       "   value_S06  value_S07  value_S08  value_S09  ...  value_S17  value_S18  \\\n",
+       "0      342.0      280.0  3197842.0   695000.0  ...       11.7  3131020.0   \n",
+       "1      360.0      249.0  3197900.0   695063.0  ...       10.2  3131420.0   \n",
+       "2      335.0      270.0  3197968.0   695124.0  ...        9.5  3131822.0   \n",
+       "3      354.0      271.0  3198011.0   695175.0  ...       10.5  3132179.0   \n",
+       "4      246.0      212.0  3198056.0   695226.0  ...        9.6  3132501.0   \n",
+       "\n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "\n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.pop\n",
+    "\n",
+    "* Input: readings (without turbine_id)\n",
+    "* Output: readings (without timestamp), timestamp\n",
+    "* Effect: timestamp has been popped from readings"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 2\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0   2013-01-10 00:00:00\n",
+       "1   2013-01-10 00:10:00\n",
+       "2   2013-01-10 00:20:00\n",
+       "3   2013-01-10 00:30:00\n",
+       "4   2013-01-10 00:40:00\n",
+       "Name: timestamp, dtype: datetime64[ns]"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['timestamp'].head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>value_S09</th>\n",
+       "      <th>value_S10</th>\n",
+       "      <th>...</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
+       "      <td>695000.0</td>\n",
+       "      <td>3348234.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
+       "      <td>695063.0</td>\n",
+       "      <td>3348296.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
+       "      <td>695124.0</td>\n",
+       "      <td>3348363.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
+       "      <td>695175.0</td>\n",
+       "      <td>3348416.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
+       "      <td>695226.0</td>\n",
+       "      <td>3348470.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   value_S01  value_S02  value_S03  value_S04  value_S05  value_S06  \\\n",
+       "0      323.0      320.0      284.0      348.0      273.0      342.0   \n",
+       "1      346.0      384.0      367.0      411.0      331.0      360.0   \n",
+       "2      407.0      363.0      407.0      393.0      275.0      335.0   \n",
+       "3      257.0      307.0      315.0      361.0      317.0      354.0   \n",
+       "4      267.0      309.0      314.0      355.0      262.0      246.0   \n",
+       "\n",
+       "   value_S07  value_S08  value_S09  value_S10  ...  value_S17  value_S18  \\\n",
+       "0      280.0  3197842.0   695000.0  3348234.0  ...       11.7  3131020.0   \n",
+       "1      249.0  3197900.0   695063.0  3348296.0  ...       10.2  3131420.0   \n",
+       "2      270.0  3197968.0   695124.0  3348363.0  ...        9.5  3131822.0   \n",
+       "3      271.0  3198011.0   695175.0  3348416.0  ...       10.5  3132179.0   \n",
+       "4      212.0  3198056.0   695226.0  3348470.0  ...        9.6  3132501.0   \n",
+       "\n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "\n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sklearn.impute.SimpleImputer\n",
+    "\n",
+    "* Input: readings (unstacked, no turbine_id, no timestamp)\n",
+    "* Output: readings (imputed, numpy array)\n",
+    "* Effect: readings have been imputed and converted to numpy array"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 3\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 19,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[3.230000e+02, 3.200000e+02, 2.840000e+02, 3.480000e+02,\n",
+       "        2.730000e+02, 3.420000e+02, 2.800000e+02, 3.197842e+06,\n",
+       "        6.950000e+05, 3.348234e+06, 3.436762e+06, 3.322362e+06,\n",
+       "        3.357952e+06, 3.223797e+06, 8.300000e+00, 6.000000e+00,\n",
+       "        1.170000e+01, 3.131020e+06, 5.500000e+01, 5.500000e+01,\n",
+       "        4.700000e+01, 5.800000e+01, 4.500000e+01, 5.800000e+01,\n",
+       "        4.700000e+01, 3.560000e+02],\n",
+       "       [3.460000e+02, 3.840000e+02, 3.670000e+02, 4.110000e+02,\n",
+       "        3.310000e+02, 3.600000e+02, 2.490000e+02, 3.197900e+06,\n",
+       "        6.950630e+05, 3.348296e+06, 3.436829e+06, 3.322417e+06,\n",
+       "        3.358013e+06, 3.223839e+06, 7.600000e+00, 5.000000e+00,\n",
+       "        1.020000e+01, 3.131420e+06, 5.800000e+01, 6.300000e+01,\n",
+       "        6.200000e+01, 6.700000e+01, 5.500000e+01, 6.100000e+01,\n",
+       "        4.200000e+01, 4.000000e+02],\n",
+       "       [4.070000e+02, 3.630000e+02, 4.070000e+02, 3.930000e+02,\n",
+       "        2.750000e+02, 3.350000e+02, 2.700000e+02, 3.197968e+06,\n",
+       "        6.951240e+05, 3.348363e+06, 3.436895e+06, 3.322463e+06,\n",
+       "        3.358068e+06, 3.223884e+06, 7.800000e+00, 5.700000e+00,\n",
+       "        9.500000e+00, 3.131822e+06, 6.800000e+01, 6.100000e+01,\n",
+       "        6.700000e+01, 6.600000e+01, 4.600000e+01, 5.500000e+01,\n",
+       "        4.500000e+01, 4.020000e+02],\n",
+       "       [2.570000e+02, 3.070000e+02, 3.150000e+02, 3.610000e+02,\n",
+       "        3.170000e+02, 3.540000e+02, 2.710000e+02, 3.198011e+06,\n",
+       "        6.951750e+05, 3.348416e+06, 3.436957e+06, 3.322516e+06,\n",
+       "        3.358128e+06, 3.223929e+06, 8.600000e+00, 6.600000e+00,\n",
+       "        1.050000e+01, 3.132179e+06, 4.300000e+01, 5.100000e+01,\n",
+       "        5.300000e+01, 6.200000e+01, 5.300000e+01, 6.000000e+01,\n",
+       "        4.500000e+01, 3.570000e+02],\n",
+       "       [2.670000e+02, 3.090000e+02, 3.140000e+02, 3.550000e+02,\n",
+       "        2.620000e+02, 2.460000e+02, 2.120000e+02, 3.198056e+06,\n",
+       "        6.952260e+05, 3.348470e+06, 3.437016e+06, 3.322559e+06,\n",
+       "        3.358169e+06, 3.223965e+06, 7.500000e+00, 5.900000e+00,\n",
+       "        9.600000e+00, 3.132501e+06, 4.500000e+01, 5.100000e+01,\n",
+       "        5.400000e+01, 5.900000e+01, 4.300000e+01, 4.100000e+01,\n",
+       "        3.600000e+01, 3.220000e+02]])"
+      ]
+     },
+     "execution_count": 20,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'][0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## sklearn.preprocessing.MinMaxScaler\n",
+    "\n",
+    "* Input: (imputed, array)\n",
+    "* Output: readings (scaled, array)\n",
+    "* Effect: readings have been scaled to [-1, 1] range"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 4\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 22,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.23563892, -0.24267292, -0.3286385 , -0.17702227, -0.35287222,\n",
+       "        -0.19248826, -0.3317757 , -1.        , -1.        , -1.        ,\n",
+       "        -1.        , -1.        , -1.        , -1.        , -0.11702128,\n",
+       "        -0.24050633, -0.25714286, -0.37378787, -0.22758621, -0.22758621,\n",
+       "        -0.31972789, -0.1862069 , -0.36986301, -0.1862069 , -0.33793103,\n",
+       "        -0.26141079],\n",
+       "       [-0.18171161, -0.0926143 , -0.13380282, -0.02930832, -0.21688159,\n",
+       "        -0.15023474, -0.40420561, -0.99995911, -0.99995779, -0.99995941,\n",
+       "        -0.99995718, -0.99996326, -0.99996042, -0.99997164, -0.19148936,\n",
+       "        -0.36708861, -0.35238095, -0.37370786, -0.1862069 , -0.11724138,\n",
+       "        -0.11564626, -0.06206897, -0.23287671, -0.14482759, -0.40689655,\n",
+       "        -0.17012448],\n",
+       "       [-0.03868699, -0.14185229, -0.0399061 , -0.07151231, -0.34818288,\n",
+       "        -0.20892019, -0.35514019, -0.99991116, -0.99991693, -0.99991555,\n",
+       "        -0.999915  , -0.99993254, -0.99992474, -0.99994125, -0.17021277,\n",
+       "        -0.27848101, -0.3968254 , -0.37362746, -0.04827586, -0.14482759,\n",
+       "        -0.04761905, -0.07586207, -0.35616438, -0.22758621, -0.36551724,\n",
+       "        -0.1659751 ],\n",
+       "       [-0.39038687, -0.27315358, -0.25586854, -0.14654162, -0.24970692,\n",
+       "        -0.16431925, -0.35280374, -0.99988085, -0.99988276, -0.99988086,\n",
+       "        -0.99987538, -0.99989714, -0.99988581, -0.99991086, -0.08510638,\n",
+       "        -0.16455696, -0.33333333, -0.37355606, -0.39310345, -0.28275862,\n",
+       "        -0.23809524, -0.13103448, -0.26027397, -0.15862069, -0.36551724,\n",
+       "        -0.2593361 ],\n",
+       "       [-0.36694021, -0.26846424, -0.25821596, -0.16060961, -0.37866354,\n",
+       "        -0.41784038, -0.49065421, -0.99984912, -0.99984859, -0.99984551,\n",
+       "        -0.99983767, -0.99986841, -0.99985921, -0.99988655, -0.20212766,\n",
+       "        -0.25316456, -0.39047619, -0.37349166, -0.36551724, -0.28275862,\n",
+       "        -0.2244898 , -0.17241379, -0.39726027, -0.42068966, -0.48965517,\n",
+       "        -0.33195021]])"
+      ]
+     },
+     "execution_count": 23,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'][0:5]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame\n",
+    "\n",
+    "* Input: readings (scaled, array)\n",
+    "* Output: readings (dataframe)\n",
+    "* Effect: readings have been converted into a dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 5\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 25,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>16</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.235639</td>\n",
+       "      <td>-0.242673</td>\n",
+       "      <td>-0.328638</td>\n",
+       "      <td>-0.177022</td>\n",
+       "      <td>-0.352872</td>\n",
+       "      <td>-0.192488</td>\n",
+       "      <td>-0.331776</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.257143</td>\n",
+       "      <td>-0.373788</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.319728</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.369863</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.337931</td>\n",
+       "      <td>-0.261411</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.181712</td>\n",
+       "      <td>-0.092614</td>\n",
+       "      <td>-0.133803</td>\n",
+       "      <td>-0.029308</td>\n",
+       "      <td>-0.216882</td>\n",
+       "      <td>-0.150235</td>\n",
+       "      <td>-0.404206</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>-0.999958</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.352381</td>\n",
+       "      <td>-0.373708</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.117241</td>\n",
+       "      <td>-0.115646</td>\n",
+       "      <td>-0.062069</td>\n",
+       "      <td>-0.232877</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.406897</td>\n",
+       "      <td>-0.170124</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.038687</td>\n",
+       "      <td>-0.141852</td>\n",
+       "      <td>-0.039906</td>\n",
+       "      <td>-0.071512</td>\n",
+       "      <td>-0.348183</td>\n",
+       "      <td>-0.208920</td>\n",
+       "      <td>-0.355140</td>\n",
+       "      <td>-0.999911</td>\n",
+       "      <td>-0.999917</td>\n",
+       "      <td>-0.999916</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.396825</td>\n",
+       "      <td>-0.373627</td>\n",
+       "      <td>-0.048276</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.047619</td>\n",
+       "      <td>-0.075862</td>\n",
+       "      <td>-0.356164</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.165975</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.390387</td>\n",
+       "      <td>-0.273154</td>\n",
+       "      <td>-0.255869</td>\n",
+       "      <td>-0.146542</td>\n",
+       "      <td>-0.249707</td>\n",
+       "      <td>-0.164319</td>\n",
+       "      <td>-0.352804</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>-0.999883</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.333333</td>\n",
+       "      <td>-0.373556</td>\n",
+       "      <td>-0.393103</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.238095</td>\n",
+       "      <td>-0.131034</td>\n",
+       "      <td>-0.260274</td>\n",
+       "      <td>-0.158621</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.259336</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.366940</td>\n",
+       "      <td>-0.268464</td>\n",
+       "      <td>-0.258216</td>\n",
+       "      <td>-0.160610</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.417840</td>\n",
+       "      <td>-0.490654</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999846</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.390476</td>\n",
+       "      <td>-0.373492</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.224490</td>\n",
+       "      <td>-0.172414</td>\n",
+       "      <td>-0.397260</td>\n",
+       "      <td>-0.420690</td>\n",
+       "      <td>-0.489655</td>\n",
+       "      <td>-0.331950</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 26 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         0         1         2         3         4         5         6   \\\n",
+       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
+       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
+       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
+       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
+       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "\n",
+       "         7         8         9   ...        16        17        18        19  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.257143 -0.373788 -0.227586 -0.227586   \n",
+       "1 -0.999959 -0.999958 -0.999959  ... -0.352381 -0.373708 -0.186207 -0.117241   \n",
+       "2 -0.999911 -0.999917 -0.999916  ... -0.396825 -0.373627 -0.048276 -0.144828   \n",
+       "3 -0.999881 -0.999883 -0.999881  ... -0.333333 -0.373556 -0.393103 -0.282759   \n",
+       "4 -0.999849 -0.999849 -0.999846  ... -0.390476 -0.373492 -0.365517 -0.282759   \n",
+       "\n",
+       "         20        21        22        23        24        25  \n",
+       "0 -0.319728 -0.186207 -0.369863 -0.186207 -0.337931 -0.261411  \n",
+       "1 -0.115646 -0.062069 -0.232877 -0.144828 -0.406897 -0.170124  \n",
+       "2 -0.047619 -0.075862 -0.356164 -0.227586 -0.365517 -0.165975  \n",
+       "3 -0.238095 -0.131034 -0.260274 -0.158621 -0.365517 -0.259336  \n",
+       "4 -0.224490 -0.172414 -0.397260 -0.420690 -0.489655 -0.331950  \n",
+       "\n",
+       "[5 rows x 26 columns]"
+      ]
+     },
+     "execution_count": 26,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.set\n",
+    "\n",
+    "* Input: readings (dataframe)\n",
+    "* Output: readings (dataframe with turbine_id)\n",
+    "* Effect: turbine_id has been set as a readings column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 6\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>17</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "      <th>turbine_id</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.235639</td>\n",
+       "      <td>-0.242673</td>\n",
+       "      <td>-0.328638</td>\n",
+       "      <td>-0.177022</td>\n",
+       "      <td>-0.352872</td>\n",
+       "      <td>-0.192488</td>\n",
+       "      <td>-0.331776</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373788</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.319728</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.369863</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.337931</td>\n",
+       "      <td>-0.261411</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.181712</td>\n",
+       "      <td>-0.092614</td>\n",
+       "      <td>-0.133803</td>\n",
+       "      <td>-0.029308</td>\n",
+       "      <td>-0.216882</td>\n",
+       "      <td>-0.150235</td>\n",
+       "      <td>-0.404206</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>-0.999958</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373708</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.117241</td>\n",
+       "      <td>-0.115646</td>\n",
+       "      <td>-0.062069</td>\n",
+       "      <td>-0.232877</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.406897</td>\n",
+       "      <td>-0.170124</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.038687</td>\n",
+       "      <td>-0.141852</td>\n",
+       "      <td>-0.039906</td>\n",
+       "      <td>-0.071512</td>\n",
+       "      <td>-0.348183</td>\n",
+       "      <td>-0.208920</td>\n",
+       "      <td>-0.355140</td>\n",
+       "      <td>-0.999911</td>\n",
+       "      <td>-0.999917</td>\n",
+       "      <td>-0.999916</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373627</td>\n",
+       "      <td>-0.048276</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.047619</td>\n",
+       "      <td>-0.075862</td>\n",
+       "      <td>-0.356164</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.165975</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.390387</td>\n",
+       "      <td>-0.273154</td>\n",
+       "      <td>-0.255869</td>\n",
+       "      <td>-0.146542</td>\n",
+       "      <td>-0.249707</td>\n",
+       "      <td>-0.164319</td>\n",
+       "      <td>-0.352804</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>-0.999883</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373556</td>\n",
+       "      <td>-0.393103</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.238095</td>\n",
+       "      <td>-0.131034</td>\n",
+       "      <td>-0.260274</td>\n",
+       "      <td>-0.158621</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.259336</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.366940</td>\n",
+       "      <td>-0.268464</td>\n",
+       "      <td>-0.258216</td>\n",
+       "      <td>-0.160610</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.417840</td>\n",
+       "      <td>-0.490654</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999846</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.373492</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.224490</td>\n",
+       "      <td>-0.172414</td>\n",
+       "      <td>-0.397260</td>\n",
+       "      <td>-0.420690</td>\n",
+       "      <td>-0.489655</td>\n",
+       "      <td>-0.331950</td>\n",
+       "      <td>T001</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 27 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1         2         3         4         5         6  \\\n",
+       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
+       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
+       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
+       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
+       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "\n",
+       "          7         8         9  ...        17        18        19        20  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.373788 -0.227586 -0.227586 -0.319728   \n",
+       "1 -0.999959 -0.999958 -0.999959  ... -0.373708 -0.186207 -0.117241 -0.115646   \n",
+       "2 -0.999911 -0.999917 -0.999916  ... -0.373627 -0.048276 -0.144828 -0.047619   \n",
+       "3 -0.999881 -0.999883 -0.999881  ... -0.373556 -0.393103 -0.282759 -0.238095   \n",
+       "4 -0.999849 -0.999849 -0.999846  ... -0.373492 -0.365517 -0.282759 -0.224490   \n",
+       "\n",
+       "         21        22        23        24        25  turbine_id  \n",
+       "0 -0.186207 -0.369863 -0.186207 -0.337931 -0.261411        T001  \n",
+       "1 -0.062069 -0.232877 -0.144828 -0.406897 -0.170124        T001  \n",
+       "2 -0.075862 -0.356164 -0.227586 -0.365517 -0.165975        T001  \n",
+       "3 -0.131034 -0.260274 -0.158621 -0.365517 -0.259336        T001  \n",
+       "4 -0.172414 -0.397260 -0.420690 -0.489655 -0.331950        T001  \n",
+       "\n",
+       "[5 rows x 27 columns]"
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.set\n",
+    "\n",
+    "* Input: readings (dataframe with turbine_id)\n",
+    "* Output: readings (dataframe with turbine_id and timestamp)\n",
+    "* Effect: timestamp has been set as a readings column"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 7\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>0</th>\n",
+       "      <th>1</th>\n",
+       "      <th>2</th>\n",
+       "      <th>3</th>\n",
+       "      <th>4</th>\n",
+       "      <th>5</th>\n",
+       "      <th>6</th>\n",
+       "      <th>7</th>\n",
+       "      <th>8</th>\n",
+       "      <th>9</th>\n",
+       "      <th>...</th>\n",
+       "      <th>18</th>\n",
+       "      <th>19</th>\n",
+       "      <th>20</th>\n",
+       "      <th>21</th>\n",
+       "      <th>22</th>\n",
+       "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.235639</td>\n",
+       "      <td>-0.242673</td>\n",
+       "      <td>-0.328638</td>\n",
+       "      <td>-0.177022</td>\n",
+       "      <td>-0.352872</td>\n",
+       "      <td>-0.192488</td>\n",
+       "      <td>-0.331776</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.319728</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.369863</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.337931</td>\n",
+       "      <td>-0.261411</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.181712</td>\n",
+       "      <td>-0.092614</td>\n",
+       "      <td>-0.133803</td>\n",
+       "      <td>-0.029308</td>\n",
+       "      <td>-0.216882</td>\n",
+       "      <td>-0.150235</td>\n",
+       "      <td>-0.404206</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>-0.999958</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.117241</td>\n",
+       "      <td>-0.115646</td>\n",
+       "      <td>-0.062069</td>\n",
+       "      <td>-0.232877</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.406897</td>\n",
+       "      <td>-0.170124</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.038687</td>\n",
+       "      <td>-0.141852</td>\n",
+       "      <td>-0.039906</td>\n",
+       "      <td>-0.071512</td>\n",
+       "      <td>-0.348183</td>\n",
+       "      <td>-0.208920</td>\n",
+       "      <td>-0.355140</td>\n",
+       "      <td>-0.999911</td>\n",
+       "      <td>-0.999917</td>\n",
+       "      <td>-0.999916</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.048276</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.047619</td>\n",
+       "      <td>-0.075862</td>\n",
+       "      <td>-0.356164</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.165975</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.390387</td>\n",
+       "      <td>-0.273154</td>\n",
+       "      <td>-0.255869</td>\n",
+       "      <td>-0.146542</td>\n",
+       "      <td>-0.249707</td>\n",
+       "      <td>-0.164319</td>\n",
+       "      <td>-0.352804</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>-0.999883</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.393103</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.238095</td>\n",
+       "      <td>-0.131034</td>\n",
+       "      <td>-0.260274</td>\n",
+       "      <td>-0.158621</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.259336</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.366940</td>\n",
+       "      <td>-0.268464</td>\n",
+       "      <td>-0.258216</td>\n",
+       "      <td>-0.160610</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.417840</td>\n",
+       "      <td>-0.490654</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999846</td>\n",
+       "      <td>...</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.224490</td>\n",
+       "      <td>-0.172414</td>\n",
+       "      <td>-0.397260</td>\n",
+       "      <td>-0.420690</td>\n",
+       "      <td>-0.489655</td>\n",
+       "      <td>-0.331950</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>5 rows × 28 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "          0         1         2         3         4         5         6  \\\n",
+       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
+       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
+       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
+       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
+       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "\n",
+       "          7         8         9  ...        18        19        20        21  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.227586 -0.227586 -0.319728 -0.186207   \n",
+       "1 -0.999959 -0.999958 -0.999959  ... -0.186207 -0.117241 -0.115646 -0.062069   \n",
+       "2 -0.999911 -0.999917 -0.999916  ... -0.048276 -0.144828 -0.047619 -0.075862   \n",
+       "3 -0.999881 -0.999883 -0.999881  ... -0.393103 -0.282759 -0.238095 -0.131034   \n",
+       "4 -0.999849 -0.999849 -0.999846  ... -0.365517 -0.282759 -0.224490 -0.172414   \n",
+       "\n",
+       "         22        23        24        25  turbine_id           timestamp  \n",
+       "0 -0.369863 -0.186207 -0.337931 -0.261411        T001 2013-01-10 00:00:00  \n",
+       "1 -0.232877 -0.144828 -0.406897 -0.170124        T001 2013-01-10 00:10:00  \n",
+       "2 -0.356164 -0.227586 -0.365517 -0.165975        T001 2013-01-10 00:20:00  \n",
+       "3 -0.260274 -0.158621 -0.365517 -0.259336        T001 2013-01-10 00:30:00  \n",
+       "4 -0.397260 -0.420690 -0.489655 -0.331950        T001 2013-01-10 00:40:00  \n",
+       "\n",
+       "[5 rows x 28 columns]"
+      ]
+     },
+     "execution_count": 32,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences\n",
+    "\n",
+    "* Input: X, readings (dataframe with turbine_id and timestamp)\n",
+    "* Output: X\n",
+    "* Effect: X has been converted to a 3d numpy array that contains 1 matrix of shape\n",
+    "  (window_size x num_signals) for each one of the target times."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}"
+      ]
+     },
+     "execution_count": 33,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "pipeline._pipeline.get_hyperparameters()[\n",
+    "    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 8\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(51121, 28)"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(353,)"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['y'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(353, 24, 26)"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([[-0.66002345, -0.57327081, -0.64084507, -0.57796014, -0.6014068 ,\n",
+       "        -0.56103286, -0.55140187, -0.9928135 , -0.99291267, -0.99315058,\n",
+       "        -0.99304288, -0.99346346, -0.99352632, -0.99395333, -0.42553191,\n",
+       "        -0.41772152, -0.58730159, -0.35996294, -0.66896552, -0.57241379,\n",
+       "        -0.61904762, -0.5862069 , -0.60273973, -0.55862069, -0.55862069,\n",
+       "        -0.59751037],\n",
+       "       [-0.2989449 , -0.38569754, -0.48591549, -0.47713951, -0.66705744,\n",
+       "        -0.5915493 , -0.77336449, -0.99278389, -0.9928852 , -0.99312701,\n",
+       "        -0.99301988, -0.9934481 , -0.9935075 , -0.9939459 , -0.39361702,\n",
+       "        -0.40506329, -0.54285714, -0.35992014, -0.40689655, -0.42068966,\n",
+       "        -0.46938776, -0.48965517, -0.67123288, -0.5862069 , -0.83448276,\n",
+       "        -0.5560166 ],\n",
+       "       [-0.33645955, -0.40679953, -0.39906103, -0.38569754, -0.56154748,\n",
+       "        -0.43192488, -0.45560748, -0.99275498, -0.9928584 , -0.99310017,\n",
+       "        -0.99299431, -0.99342739, -0.99348349, -0.99392294, -0.29787234,\n",
+       "        -0.3164557 , -0.49206349, -0.35986854, -0.42068966, -0.43448276,\n",
+       "        -0.40136054, -0.43448276, -0.56164384, -0.47586207, -0.51724138,\n",
+       "        -0.46473029]])"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['X'][0][:3]"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## keras.Sequential.LSTMTimeSeriesClassifier\n",
+    "\n",
+    "* Input: X, y\n",
+    "* Output: \n",
+    "* Effect: LSTM has been fitted."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "2022-01-18 07:34:41.001707: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
+      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+      "2022-01-18 07:34:41.024991: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fac7ea34260 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+      "2022-01-18 07:34:41.025038: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
+     ]
+    }
+   ],
+   "source": [
+    "step = 9\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.11"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb b/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
deleted file mode 100644
index f539e89..0000000
--- a/tutorials/pipelines/unstack_double_lstm_timeseries_classifier.ipynb
+++ /dev/null
@@ -1,2481 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# unstack_double_lstm_timeseries_classifier"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from draco.demo import load_demo\n",
-    "\n",
-    "target_times, readings = load_demo()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pipeline_name = 'classes.unstack_double_lstm_timeseries_classifier'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from draco.pipeline import DracoPipeline\n",
-    "\n",
-    "pipeline = DracoPipeline(pipeline_name)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['pandas.DataFrame.resample',\n",
-       " 'pandas.DataFrame.unstack',\n",
-       " 'pandas.DataFrame.pop',\n",
-       " 'pandas.DataFrame.pop',\n",
-       " 'sklearn.impute.SimpleImputer',\n",
-       " 'sklearn.preprocessing.MinMaxScaler',\n",
-       " 'pandas.DataFrame',\n",
-       " 'pandas.DataFrame.set',\n",
-       " 'pandas.DataFrame.set',\n",
-       " 'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
-       " 'keras.Sequential.DoubleLSTMTimeSeriesClassifier']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pipeline.template['primitives']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Step by Step execution"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Input Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>323.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S02</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>320.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S03</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>284.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S04</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>348.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S05</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>273.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id signal_id  timestamp  value\n",
-       "0       T001       S01 2013-01-10  323.0\n",
-       "1       T001       S02 2013-01-10  320.0\n",
-       "2       T001       S03 2013-01-10  284.0\n",
-       "3       T001       S04 2013-01-10  348.0\n",
-       "4       T001       S05 2013-01-10  273.0"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "readings.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>cutoff_time</th>\n",
-       "      <th>target</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-12</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-13</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-14</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-15</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-16</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id cutoff_time  target\n",
-       "0       T001  2013-01-12       0\n",
-       "1       T001  2013-01-13       0\n",
-       "2       T001  2013-01-14       0\n",
-       "3       T001  2013-01-15       1\n",
-       "4       T001  2013-01-16       0"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_times.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Data Preparation (part of Draco Pipeline)\n",
-    "\n",
-    "* Input: target_times, readings, turbines\n",
-    "* Output: X, y, readings, turbines\n",
-    "* Effect: target_times has been split into X and y"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.resample\n",
-    "\n",
-    "* Input: readings\n",
-    "* Output: readings (resampled)\n",
-    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
-    "  signal_id and timestamp have been set as a multi-index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "context = pipeline.fit(target_times, readings, output_=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"5\" valign=\"top\">T001</th>\n",
-       "      <th rowspan=\"5\" valign=\"top\">S01</th>\n",
-       "      <th>2013-01-10 00:00:00</th>\n",
-       "      <td>313.333333</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 01:00:00</th>\n",
-       "      <td>197.500000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 02:00:00</th>\n",
-       "      <td>248.166667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 03:00:00</th>\n",
-       "      <td>253.166667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 04:00:00</th>\n",
-       "      <td>305.000000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                               value\n",
-       "turbine_id signal_id timestamp                      \n",
-       "T001       S01       2013-01-10 00:00:00  313.333333\n",
-       "                     2013-01-10 01:00:00  197.500000\n",
-       "                     2013-01-10 02:00:00  248.166667\n",
-       "                     2013-01-10 03:00:00  253.166667\n",
-       "                     2013-01-10 04:00:00  305.000000"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.unstack\n",
-    "\n",
-    "* Input: readings (resampled)\n",
-    "* Output: readings (unstacked)\n",
-    "* Effect: readings have been unstacked"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 1\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value_S01</th>\n",
-       "      <th>value_S02</th>\n",
-       "      <th>value_S03</th>\n",
-       "      <th>value_S04</th>\n",
-       "      <th>value_S05</th>\n",
-       "      <th>value_S06</th>\n",
-       "      <th>value_S07</th>\n",
-       "      <th>value_S08</th>\n",
-       "      <th>...</th>\n",
-       "      <th>value_S17</th>\n",
-       "      <th>value_S18</th>\n",
-       "      <th>value_S19</th>\n",
-       "      <th>value_S20</th>\n",
-       "      <th>value_S21</th>\n",
-       "      <th>value_S22</th>\n",
-       "      <th>value_S23</th>\n",
-       "      <th>value_S24</th>\n",
-       "      <th>value_S25</th>\n",
-       "      <th>value_S26</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>313.333333</td>\n",
-       "      <td>323.833333</td>\n",
-       "      <td>336.000000</td>\n",
-       "      <td>364.666667</td>\n",
-       "      <td>286.500000</td>\n",
-       "      <td>314.000000</td>\n",
-       "      <td>243.166667</td>\n",
-       "      <td>3.197980e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.383333</td>\n",
-       "      <td>3.131958e+06</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>54.333333</td>\n",
-       "      <td>56.166667</td>\n",
-       "      <td>61.000000</td>\n",
-       "      <td>47.666667</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>40.833333</td>\n",
-       "      <td>357.333333</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 01:00:00</td>\n",
-       "      <td>197.500000</td>\n",
-       "      <td>221.333333</td>\n",
-       "      <td>216.000000</td>\n",
-       "      <td>260.666667</td>\n",
-       "      <td>206.833333</td>\n",
-       "      <td>235.833333</td>\n",
-       "      <td>186.666667</td>\n",
-       "      <td>3.198221e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.666667</td>\n",
-       "      <td>3.133668e+06</td>\n",
-       "      <td>33.166667</td>\n",
-       "      <td>37.000000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>43.666667</td>\n",
-       "      <td>34.500000</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>31.166667</td>\n",
-       "      <td>249.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 02:00:00</td>\n",
-       "      <td>248.166667</td>\n",
-       "      <td>271.666667</td>\n",
-       "      <td>277.500000</td>\n",
-       "      <td>298.000000</td>\n",
-       "      <td>233.666667</td>\n",
-       "      <td>271.166667</td>\n",
-       "      <td>216.333333</td>\n",
-       "      <td>3.198448e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.833333</td>\n",
-       "      <td>3.135413e+06</td>\n",
-       "      <td>41.500000</td>\n",
-       "      <td>45.666667</td>\n",
-       "      <td>46.500000</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>45.500000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>297.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 03:00:00</td>\n",
-       "      <td>253.166667</td>\n",
-       "      <td>256.166667</td>\n",
-       "      <td>242.666667</td>\n",
-       "      <td>265.333333</td>\n",
-       "      <td>211.666667</td>\n",
-       "      <td>226.666667</td>\n",
-       "      <td>181.000000</td>\n",
-       "      <td>3.198691e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.433333</td>\n",
-       "      <td>3.137001e+06</td>\n",
-       "      <td>42.333333</td>\n",
-       "      <td>42.833333</td>\n",
-       "      <td>40.500000</td>\n",
-       "      <td>44.166667</td>\n",
-       "      <td>35.333333</td>\n",
-       "      <td>37.833333</td>\n",
-       "      <td>30.333333</td>\n",
-       "      <td>268.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 04:00:00</td>\n",
-       "      <td>305.000000</td>\n",
-       "      <td>312.333333</td>\n",
-       "      <td>346.166667</td>\n",
-       "      <td>329.833333</td>\n",
-       "      <td>280.666667</td>\n",
-       "      <td>308.833333</td>\n",
-       "      <td>271.833333</td>\n",
-       "      <td>3.198978e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.083333</td>\n",
-       "      <td>3.138843e+06</td>\n",
-       "      <td>50.500000</td>\n",
-       "      <td>51.166667</td>\n",
-       "      <td>55.500000</td>\n",
-       "      <td>53.666667</td>\n",
-       "      <td>46.166667</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>41.166667</td>\n",
-       "      <td>341.833333</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 28 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id           timestamp   value_S01   value_S02   value_S03  \\\n",
-       "0       T001 2013-01-10 00:00:00  313.333333  323.833333  336.000000   \n",
-       "1       T001 2013-01-10 01:00:00  197.500000  221.333333  216.000000   \n",
-       "2       T001 2013-01-10 02:00:00  248.166667  271.666667  277.500000   \n",
-       "3       T001 2013-01-10 03:00:00  253.166667  256.166667  242.666667   \n",
-       "4       T001 2013-01-10 04:00:00  305.000000  312.333333  346.166667   \n",
-       "\n",
-       "    value_S04   value_S05   value_S06   value_S07     value_S08  ...  \\\n",
-       "0  364.666667  286.500000  314.000000  243.166667  3.197980e+06  ...   \n",
-       "1  260.666667  206.833333  235.833333  186.666667  3.198221e+06  ...   \n",
-       "2  298.000000  233.666667  271.166667  216.333333  3.198448e+06  ...   \n",
-       "3  265.333333  211.666667  226.666667  181.000000  3.198691e+06  ...   \n",
-       "4  329.833333  280.666667  308.833333  271.833333  3.198978e+06  ...   \n",
-       "\n",
-       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
-       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
-       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
-       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
-       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
-       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
-       "\n",
-       "   value_S23  value_S24  value_S25   value_S26  \n",
-       "0  47.666667  52.666667  40.833333  357.333333  \n",
-       "1  34.500000  39.333333  31.166667  249.666667  \n",
-       "2  39.333333  45.500000  36.166667  297.666667  \n",
-       "3  35.333333  37.833333  30.333333  268.000000  \n",
-       "4  46.166667  49.666667  41.166667  341.833333  \n",
-       "\n",
-       "[5 rows x 28 columns]"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.pop\n",
-    "\n",
-    "* Input: readings (unstacked)\n",
-    "* Output: readings (without turbine_id), turbine_id\n",
-    "* Effect: turbine_id has been popped from readings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 2\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0    T001\n",
-       "1    T001\n",
-       "2    T001\n",
-       "3    T001\n",
-       "4    T001\n",
-       "Name: turbine_id, dtype: object"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['turbine_id'].head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value_S01</th>\n",
-       "      <th>value_S02</th>\n",
-       "      <th>value_S03</th>\n",
-       "      <th>value_S04</th>\n",
-       "      <th>value_S05</th>\n",
-       "      <th>value_S06</th>\n",
-       "      <th>value_S07</th>\n",
-       "      <th>value_S08</th>\n",
-       "      <th>value_S09</th>\n",
-       "      <th>...</th>\n",
-       "      <th>value_S17</th>\n",
-       "      <th>value_S18</th>\n",
-       "      <th>value_S19</th>\n",
-       "      <th>value_S20</th>\n",
-       "      <th>value_S21</th>\n",
-       "      <th>value_S22</th>\n",
-       "      <th>value_S23</th>\n",
-       "      <th>value_S24</th>\n",
-       "      <th>value_S25</th>\n",
-       "      <th>value_S26</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>313.333333</td>\n",
-       "      <td>323.833333</td>\n",
-       "      <td>336.000000</td>\n",
-       "      <td>364.666667</td>\n",
-       "      <td>286.500000</td>\n",
-       "      <td>314.000000</td>\n",
-       "      <td>243.166667</td>\n",
-       "      <td>3.197980e+06</td>\n",
-       "      <td>695143.166667</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.383333</td>\n",
-       "      <td>3.131958e+06</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>54.333333</td>\n",
-       "      <td>56.166667</td>\n",
-       "      <td>61.000000</td>\n",
-       "      <td>47.666667</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>40.833333</td>\n",
-       "      <td>357.333333</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2013-01-10 01:00:00</td>\n",
-       "      <td>197.500000</td>\n",
-       "      <td>221.333333</td>\n",
-       "      <td>216.000000</td>\n",
-       "      <td>260.666667</td>\n",
-       "      <td>206.833333</td>\n",
-       "      <td>235.833333</td>\n",
-       "      <td>186.666667</td>\n",
-       "      <td>3.198221e+06</td>\n",
-       "      <td>695403.666667</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.666667</td>\n",
-       "      <td>3.133668e+06</td>\n",
-       "      <td>33.166667</td>\n",
-       "      <td>37.000000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>43.666667</td>\n",
-       "      <td>34.500000</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>31.166667</td>\n",
-       "      <td>249.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2013-01-10 02:00:00</td>\n",
-       "      <td>248.166667</td>\n",
-       "      <td>271.666667</td>\n",
-       "      <td>277.500000</td>\n",
-       "      <td>298.000000</td>\n",
-       "      <td>233.666667</td>\n",
-       "      <td>271.166667</td>\n",
-       "      <td>216.333333</td>\n",
-       "      <td>3.198448e+06</td>\n",
-       "      <td>695656.500000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.833333</td>\n",
-       "      <td>3.135413e+06</td>\n",
-       "      <td>41.500000</td>\n",
-       "      <td>45.666667</td>\n",
-       "      <td>46.500000</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>45.500000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>297.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>2013-01-10 03:00:00</td>\n",
-       "      <td>253.166667</td>\n",
-       "      <td>256.166667</td>\n",
-       "      <td>242.666667</td>\n",
-       "      <td>265.333333</td>\n",
-       "      <td>211.666667</td>\n",
-       "      <td>226.666667</td>\n",
-       "      <td>181.000000</td>\n",
-       "      <td>3.198691e+06</td>\n",
-       "      <td>695911.333333</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.433333</td>\n",
-       "      <td>3.137001e+06</td>\n",
-       "      <td>42.333333</td>\n",
-       "      <td>42.833333</td>\n",
-       "      <td>40.500000</td>\n",
-       "      <td>44.166667</td>\n",
-       "      <td>35.333333</td>\n",
-       "      <td>37.833333</td>\n",
-       "      <td>30.333333</td>\n",
-       "      <td>268.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>2013-01-10 04:00:00</td>\n",
-       "      <td>305.000000</td>\n",
-       "      <td>312.333333</td>\n",
-       "      <td>346.166667</td>\n",
-       "      <td>329.833333</td>\n",
-       "      <td>280.666667</td>\n",
-       "      <td>308.833333</td>\n",
-       "      <td>271.833333</td>\n",
-       "      <td>3.198978e+06</td>\n",
-       "      <td>696195.833333</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.083333</td>\n",
-       "      <td>3.138843e+06</td>\n",
-       "      <td>50.500000</td>\n",
-       "      <td>51.166667</td>\n",
-       "      <td>55.500000</td>\n",
-       "      <td>53.666667</td>\n",
-       "      <td>46.166667</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>41.166667</td>\n",
-       "      <td>341.833333</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 27 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            timestamp   value_S01   value_S02   value_S03   value_S04  \\\n",
-       "0 2013-01-10 00:00:00  313.333333  323.833333  336.000000  364.666667   \n",
-       "1 2013-01-10 01:00:00  197.500000  221.333333  216.000000  260.666667   \n",
-       "2 2013-01-10 02:00:00  248.166667  271.666667  277.500000  298.000000   \n",
-       "3 2013-01-10 03:00:00  253.166667  256.166667  242.666667  265.333333   \n",
-       "4 2013-01-10 04:00:00  305.000000  312.333333  346.166667  329.833333   \n",
-       "\n",
-       "    value_S05   value_S06   value_S07     value_S08      value_S09  ...  \\\n",
-       "0  286.500000  314.000000  243.166667  3.197980e+06  695143.166667  ...   \n",
-       "1  206.833333  235.833333  186.666667  3.198221e+06  695403.666667  ...   \n",
-       "2  233.666667  271.166667  216.333333  3.198448e+06  695656.500000  ...   \n",
-       "3  211.666667  226.666667  181.000000  3.198691e+06  695911.333333  ...   \n",
-       "4  280.666667  308.833333  271.833333  3.198978e+06  696195.833333  ...   \n",
-       "\n",
-       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
-       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
-       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
-       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
-       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
-       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
-       "\n",
-       "   value_S23  value_S24  value_S25   value_S26  \n",
-       "0  47.666667  52.666667  40.833333  357.333333  \n",
-       "1  34.500000  39.333333  31.166667  249.666667  \n",
-       "2  39.333333  45.500000  36.166667  297.666667  \n",
-       "3  35.333333  37.833333  30.333333  268.000000  \n",
-       "4  46.166667  49.666667  41.166667  341.833333  \n",
-       "\n",
-       "[5 rows x 27 columns]"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.pop\n",
-    "\n",
-    "* Input: readings (without turbine_id)\n",
-    "* Output: readings (without timestamp), timestamp\n",
-    "* Effect: timestamp has been popped from readings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 3\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0   2013-01-10 00:00:00\n",
-       "1   2013-01-10 01:00:00\n",
-       "2   2013-01-10 02:00:00\n",
-       "3   2013-01-10 03:00:00\n",
-       "4   2013-01-10 04:00:00\n",
-       "Name: timestamp, dtype: datetime64[ns]"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['timestamp'].head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>value_S01</th>\n",
-       "      <th>value_S02</th>\n",
-       "      <th>value_S03</th>\n",
-       "      <th>value_S04</th>\n",
-       "      <th>value_S05</th>\n",
-       "      <th>value_S06</th>\n",
-       "      <th>value_S07</th>\n",
-       "      <th>value_S08</th>\n",
-       "      <th>value_S09</th>\n",
-       "      <th>value_S10</th>\n",
-       "      <th>...</th>\n",
-       "      <th>value_S17</th>\n",
-       "      <th>value_S18</th>\n",
-       "      <th>value_S19</th>\n",
-       "      <th>value_S20</th>\n",
-       "      <th>value_S21</th>\n",
-       "      <th>value_S22</th>\n",
-       "      <th>value_S23</th>\n",
-       "      <th>value_S24</th>\n",
-       "      <th>value_S25</th>\n",
-       "      <th>value_S26</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>313.333333</td>\n",
-       "      <td>323.833333</td>\n",
-       "      <td>336.000000</td>\n",
-       "      <td>364.666667</td>\n",
-       "      <td>286.500000</td>\n",
-       "      <td>314.000000</td>\n",
-       "      <td>243.166667</td>\n",
-       "      <td>3.197980e+06</td>\n",
-       "      <td>695143.166667</td>\n",
-       "      <td>3.348384e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.383333</td>\n",
-       "      <td>3.131958e+06</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>54.333333</td>\n",
-       "      <td>56.166667</td>\n",
-       "      <td>61.000000</td>\n",
-       "      <td>47.666667</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>40.833333</td>\n",
-       "      <td>357.333333</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>197.500000</td>\n",
-       "      <td>221.333333</td>\n",
-       "      <td>216.000000</td>\n",
-       "      <td>260.666667</td>\n",
-       "      <td>206.833333</td>\n",
-       "      <td>235.833333</td>\n",
-       "      <td>186.666667</td>\n",
-       "      <td>3.198221e+06</td>\n",
-       "      <td>695403.666667</td>\n",
-       "      <td>3.348651e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.666667</td>\n",
-       "      <td>3.133668e+06</td>\n",
-       "      <td>33.166667</td>\n",
-       "      <td>37.000000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>43.666667</td>\n",
-       "      <td>34.500000</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>31.166667</td>\n",
-       "      <td>249.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>248.166667</td>\n",
-       "      <td>271.666667</td>\n",
-       "      <td>277.500000</td>\n",
-       "      <td>298.000000</td>\n",
-       "      <td>233.666667</td>\n",
-       "      <td>271.166667</td>\n",
-       "      <td>216.333333</td>\n",
-       "      <td>3.198448e+06</td>\n",
-       "      <td>695656.500000</td>\n",
-       "      <td>3.348910e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.833333</td>\n",
-       "      <td>3.135413e+06</td>\n",
-       "      <td>41.500000</td>\n",
-       "      <td>45.666667</td>\n",
-       "      <td>46.500000</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>45.500000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>297.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>253.166667</td>\n",
-       "      <td>256.166667</td>\n",
-       "      <td>242.666667</td>\n",
-       "      <td>265.333333</td>\n",
-       "      <td>211.666667</td>\n",
-       "      <td>226.666667</td>\n",
-       "      <td>181.000000</td>\n",
-       "      <td>3.198691e+06</td>\n",
-       "      <td>695911.333333</td>\n",
-       "      <td>3.349157e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.433333</td>\n",
-       "      <td>3.137001e+06</td>\n",
-       "      <td>42.333333</td>\n",
-       "      <td>42.833333</td>\n",
-       "      <td>40.500000</td>\n",
-       "      <td>44.166667</td>\n",
-       "      <td>35.333333</td>\n",
-       "      <td>37.833333</td>\n",
-       "      <td>30.333333</td>\n",
-       "      <td>268.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>305.000000</td>\n",
-       "      <td>312.333333</td>\n",
-       "      <td>346.166667</td>\n",
-       "      <td>329.833333</td>\n",
-       "      <td>280.666667</td>\n",
-       "      <td>308.833333</td>\n",
-       "      <td>271.833333</td>\n",
-       "      <td>3.198978e+06</td>\n",
-       "      <td>696195.833333</td>\n",
-       "      <td>3.349452e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.083333</td>\n",
-       "      <td>3.138843e+06</td>\n",
-       "      <td>50.500000</td>\n",
-       "      <td>51.166667</td>\n",
-       "      <td>55.500000</td>\n",
-       "      <td>53.666667</td>\n",
-       "      <td>46.166667</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>41.166667</td>\n",
-       "      <td>341.833333</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 26 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    value_S01   value_S02   value_S03   value_S04   value_S05   value_S06  \\\n",
-       "0  313.333333  323.833333  336.000000  364.666667  286.500000  314.000000   \n",
-       "1  197.500000  221.333333  216.000000  260.666667  206.833333  235.833333   \n",
-       "2  248.166667  271.666667  277.500000  298.000000  233.666667  271.166667   \n",
-       "3  253.166667  256.166667  242.666667  265.333333  211.666667  226.666667   \n",
-       "4  305.000000  312.333333  346.166667  329.833333  280.666667  308.833333   \n",
-       "\n",
-       "    value_S07     value_S08      value_S09     value_S10  ...  value_S17  \\\n",
-       "0  243.166667  3.197980e+06  695143.166667  3.348384e+06  ...  10.383333   \n",
-       "1  186.666667  3.198221e+06  695403.666667  3.348651e+06  ...   8.666667   \n",
-       "2  216.333333  3.198448e+06  695656.500000  3.348910e+06  ...   8.833333   \n",
-       "3  181.000000  3.198691e+06  695911.333333  3.349157e+06  ...   8.433333   \n",
-       "4  271.833333  3.198978e+06  696195.833333  3.349452e+06  ...   9.083333   \n",
-       "\n",
-       "      value_S18  value_S19  value_S20  value_S21  value_S22  value_S23  \\\n",
-       "0  3.131958e+06  52.666667  54.333333  56.166667  61.000000  47.666667   \n",
-       "1  3.133668e+06  33.166667  37.000000  36.166667  43.666667  34.500000   \n",
-       "2  3.135413e+06  41.500000  45.666667  46.500000  49.666667  39.333333   \n",
-       "3  3.137001e+06  42.333333  42.833333  40.500000  44.166667  35.333333   \n",
-       "4  3.138843e+06  50.500000  51.166667  55.500000  53.666667  46.166667   \n",
-       "\n",
-       "   value_S24  value_S25   value_S26  \n",
-       "0  52.666667  40.833333  357.333333  \n",
-       "1  39.333333  31.166667  249.666667  \n",
-       "2  45.500000  36.166667  297.666667  \n",
-       "3  37.833333  30.333333  268.000000  \n",
-       "4  49.666667  41.166667  341.833333  \n",
-       "\n",
-       "[5 rows x 26 columns]"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## sklearn.impute.SimpleImputer\n",
-    "\n",
-    "* Input: readings (unstacked, no turbine_id, no timestamp)\n",
-    "* Output: readings (imputed, numpy array)\n",
-    "* Effect: readings have been imputed and converted to numpy array"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 4\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[3.13333333e+02, 3.23833333e+02, 3.36000000e+02, 3.64666667e+02,\n",
-       "        2.86500000e+02, 3.14000000e+02, 2.43166667e+02, 3.19798000e+06,\n",
-       "        6.95143167e+05, 3.34838383e+06, 3.43692150e+06, 3.32248667e+06,\n",
-       "        3.35809000e+06, 3.22390150e+06, 7.95000000e+00, 5.85000000e+00,\n",
-       "        1.03833333e+01, 3.13195833e+06, 5.26666667e+01, 5.43333333e+01,\n",
-       "        5.61666667e+01, 6.10000000e+01, 4.76666667e+01, 5.26666667e+01,\n",
-       "        4.08333333e+01, 3.57333333e+02],\n",
-       "       [1.97500000e+02, 2.21333333e+02, 2.16000000e+02, 2.60666667e+02,\n",
-       "        2.06833333e+02, 2.35833333e+02, 1.86666667e+02, 3.19822067e+06,\n",
-       "        6.95403667e+05, 3.34865117e+06, 3.43722283e+06, 3.32272200e+06,\n",
-       "        3.35834000e+06, 3.22409567e+06, 6.83333333e+00, 5.15000000e+00,\n",
-       "        8.66666667e+00, 3.13366817e+06, 3.31666667e+01, 3.70000000e+01,\n",
-       "        3.61666667e+01, 4.36666667e+01, 3.45000000e+01, 3.93333333e+01,\n",
-       "        3.11666667e+01, 2.49666667e+02],\n",
-       "       [2.48166667e+02, 2.71666667e+02, 2.77500000e+02, 2.98000000e+02,\n",
-       "        2.33666667e+02, 2.71166667e+02, 2.16333333e+02, 3.19844767e+06,\n",
-       "        6.95656500e+05, 3.34890967e+06, 3.43751900e+06, 3.32295950e+06,\n",
-       "        3.35862067e+06, 3.22432333e+06, 7.11666667e+00, 5.56666667e+00,\n",
-       "        8.83333333e+00, 3.13541283e+06, 4.15000000e+01, 4.56666667e+01,\n",
-       "        4.65000000e+01, 4.96666667e+01, 3.93333333e+01, 4.55000000e+01,\n",
-       "        3.61666667e+01, 2.97666667e+02],\n",
-       "       [2.53166667e+02, 2.56166667e+02, 2.42666667e+02, 2.65333333e+02,\n",
-       "        2.11666667e+02, 2.26666667e+02, 1.81000000e+02, 3.19869117e+06,\n",
-       "        6.95911333e+05, 3.34915717e+06, 3.43778050e+06, 3.32316850e+06,\n",
-       "        3.35884883e+06, 3.22450217e+06, 6.71666667e+00, 5.16666667e+00,\n",
-       "        8.43333333e+00, 3.13700133e+06, 4.23333333e+01, 4.28333333e+01,\n",
-       "        4.05000000e+01, 4.41666667e+01, 3.53333333e+01, 3.78333333e+01,\n",
-       "        3.03333333e+01, 2.68000000e+02],\n",
-       "       [3.05000000e+02, 3.12333333e+02, 3.46166667e+02, 3.29833333e+02,\n",
-       "        2.80666667e+02, 3.08833333e+02, 2.71833333e+02, 3.19897850e+06,\n",
-       "        6.96195833e+05, 3.34945200e+06, 3.43807767e+06, 3.32340933e+06,\n",
-       "        3.35910983e+06, 3.22471400e+06, 7.20000000e+00, 5.28333333e+00,\n",
-       "        9.08333333e+00, 3.13884333e+06, 5.05000000e+01, 5.11666667e+01,\n",
-       "        5.55000000e+01, 5.36666667e+01, 4.61666667e+01, 4.96666667e+01,\n",
-       "        4.11666667e+01, 3.41833333e+02]])"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'][0:5]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## sklearn.preprocessing.MinMaxScaler\n",
-    "\n",
-    "* Input: (imputed, array)\n",
-    "* Output: readings (scaled, array)\n",
-    "* Effect: readings have been scaled to [-1, 1] range"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 5\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[-0.26126126, -0.23706897, -0.20870076, -0.14106583, -0.32328767,\n",
-       "        -0.25969448, -0.42198789, -1.        , -1.        , -1.        ,\n",
-       "        -1.        , -1.        , -1.        , -1.        , -0.11007463,\n",
-       "        -0.16824645, -0.10424155, -0.37397741, -0.25233645, -0.22716628,\n",
-       "        -0.20140515, -0.13481829, -0.32239156, -0.25380117, -0.4182243 ,\n",
-       "        -0.25697453],\n",
-       "       [-0.53349001, -0.47805643, -0.49088771, -0.38557994, -0.51037182,\n",
-       "        -0.44339992, -0.55438391, -0.99983031, -0.99982547, -0.99982499,\n",
-       "        -0.99980741, -0.9998428 , -0.99983779, -0.99986887, -0.23507463,\n",
-       "        -0.26777251, -0.25233645, -0.37363511, -0.52570093, -0.470726  ,\n",
-       "        -0.4824356 , -0.37866354, -0.50762016, -0.44093567, -0.55373832,\n",
-       "        -0.48085254],\n",
-       "       [-0.41441441, -0.35971787, -0.3462669 , -0.29780564, -0.44735812,\n",
-       "        -0.36036036, -0.48486624, -0.99967026, -0.99965608, -0.99965576,\n",
-       "        -0.99961813, -0.99968416, -0.99965569, -0.99971512, -0.20335821,\n",
-       "        -0.20853081, -0.2379583 , -0.37328583, -0.4088785 , -0.34894614,\n",
-       "        -0.33723653, -0.29425557, -0.43962485, -0.35438596, -0.48364486,\n",
-       "        -0.38104315],\n",
-       "       [-0.40266353, -0.39615987, -0.4281795 , -0.37460815, -0.49902153,\n",
-       "        -0.4649432 , -0.56766257, -0.99949857, -0.99948535, -0.99949373,\n",
-       "        -0.999451  , -0.99954455, -0.99950765, -0.99959435, -0.24813433,\n",
-       "        -0.26540284, -0.27246585, -0.37296782, -0.39719626, -0.38875878,\n",
-       "        -0.42154567, -0.37162954, -0.49589683, -0.4619883 , -0.56542056,\n",
-       "        -0.4427309 ],\n",
-       "       [-0.28084606, -0.26410658, -0.18479326, -0.22296238, -0.3369863 ,\n",
-       "        -0.27183705, -0.35481351, -0.99929598, -0.99929474, -0.99930071,\n",
-       "        -0.99926107, -0.99938368, -0.99933831, -0.9994513 , -0.19402985,\n",
-       "        -0.24881517, -0.21639109, -0.37259906, -0.28271028, -0.27166276,\n",
-       "        -0.21077283, -0.23798359, -0.34349355, -0.29590643, -0.4135514 ,\n",
-       "        -0.28920464]])"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'][0:5]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame\n",
-    "\n",
-    "* Input: readings (scaled, array)\n",
-    "* Output: readings (dataframe)\n",
-    "* Effect: readings have been converted into a dataframe"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 6\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "      <th>3</th>\n",
-       "      <th>4</th>\n",
-       "      <th>5</th>\n",
-       "      <th>6</th>\n",
-       "      <th>7</th>\n",
-       "      <th>8</th>\n",
-       "      <th>9</th>\n",
-       "      <th>...</th>\n",
-       "      <th>16</th>\n",
-       "      <th>17</th>\n",
-       "      <th>18</th>\n",
-       "      <th>19</th>\n",
-       "      <th>20</th>\n",
-       "      <th>21</th>\n",
-       "      <th>22</th>\n",
-       "      <th>23</th>\n",
-       "      <th>24</th>\n",
-       "      <th>25</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.261261</td>\n",
-       "      <td>-0.237069</td>\n",
-       "      <td>-0.208701</td>\n",
-       "      <td>-0.141066</td>\n",
-       "      <td>-0.323288</td>\n",
-       "      <td>-0.259694</td>\n",
-       "      <td>-0.421988</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.104242</td>\n",
-       "      <td>-0.373977</td>\n",
-       "      <td>-0.252336</td>\n",
-       "      <td>-0.227166</td>\n",
-       "      <td>-0.201405</td>\n",
-       "      <td>-0.134818</td>\n",
-       "      <td>-0.322392</td>\n",
-       "      <td>-0.253801</td>\n",
-       "      <td>-0.418224</td>\n",
-       "      <td>-0.256975</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-0.533490</td>\n",
-       "      <td>-0.478056</td>\n",
-       "      <td>-0.490888</td>\n",
-       "      <td>-0.385580</td>\n",
-       "      <td>-0.510372</td>\n",
-       "      <td>-0.443400</td>\n",
-       "      <td>-0.554384</td>\n",
-       "      <td>-0.999830</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.252336</td>\n",
-       "      <td>-0.373635</td>\n",
-       "      <td>-0.525701</td>\n",
-       "      <td>-0.470726</td>\n",
-       "      <td>-0.482436</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.507620</td>\n",
-       "      <td>-0.440936</td>\n",
-       "      <td>-0.553738</td>\n",
-       "      <td>-0.480853</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.414414</td>\n",
-       "      <td>-0.359718</td>\n",
-       "      <td>-0.346267</td>\n",
-       "      <td>-0.297806</td>\n",
-       "      <td>-0.447358</td>\n",
-       "      <td>-0.360360</td>\n",
-       "      <td>-0.484866</td>\n",
-       "      <td>-0.999670</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.237958</td>\n",
-       "      <td>-0.373286</td>\n",
-       "      <td>-0.408879</td>\n",
-       "      <td>-0.348946</td>\n",
-       "      <td>-0.337237</td>\n",
-       "      <td>-0.294256</td>\n",
-       "      <td>-0.439625</td>\n",
-       "      <td>-0.354386</td>\n",
-       "      <td>-0.483645</td>\n",
-       "      <td>-0.381043</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>-0.402664</td>\n",
-       "      <td>-0.396160</td>\n",
-       "      <td>-0.428180</td>\n",
-       "      <td>-0.374608</td>\n",
-       "      <td>-0.499022</td>\n",
-       "      <td>-0.464943</td>\n",
-       "      <td>-0.567663</td>\n",
-       "      <td>-0.999499</td>\n",
-       "      <td>-0.999485</td>\n",
-       "      <td>-0.999494</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.272466</td>\n",
-       "      <td>-0.372968</td>\n",
-       "      <td>-0.397196</td>\n",
-       "      <td>-0.388759</td>\n",
-       "      <td>-0.421546</td>\n",
-       "      <td>-0.371630</td>\n",
-       "      <td>-0.495897</td>\n",
-       "      <td>-0.461988</td>\n",
-       "      <td>-0.565421</td>\n",
-       "      <td>-0.442731</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-0.280846</td>\n",
-       "      <td>-0.264107</td>\n",
-       "      <td>-0.184793</td>\n",
-       "      <td>-0.222962</td>\n",
-       "      <td>-0.336986</td>\n",
-       "      <td>-0.271837</td>\n",
-       "      <td>-0.354814</td>\n",
-       "      <td>-0.999296</td>\n",
-       "      <td>-0.999295</td>\n",
-       "      <td>-0.999301</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.216391</td>\n",
-       "      <td>-0.372599</td>\n",
-       "      <td>-0.282710</td>\n",
-       "      <td>-0.271663</td>\n",
-       "      <td>-0.210773</td>\n",
-       "      <td>-0.237984</td>\n",
-       "      <td>-0.343494</td>\n",
-       "      <td>-0.295906</td>\n",
-       "      <td>-0.413551</td>\n",
-       "      <td>-0.289205</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 26 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "         0         1         2         3         4         5         6   \\\n",
-       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
-       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
-       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
-       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
-       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
-       "\n",
-       "         7         8         9   ...        16        17        18        19  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.104242 -0.373977 -0.252336 -0.227166   \n",
-       "1 -0.999830 -0.999825 -0.999825  ... -0.252336 -0.373635 -0.525701 -0.470726   \n",
-       "2 -0.999670 -0.999656 -0.999656  ... -0.237958 -0.373286 -0.408879 -0.348946   \n",
-       "3 -0.999499 -0.999485 -0.999494  ... -0.272466 -0.372968 -0.397196 -0.388759   \n",
-       "4 -0.999296 -0.999295 -0.999301  ... -0.216391 -0.372599 -0.282710 -0.271663   \n",
-       "\n",
-       "         20        21        22        23        24        25  \n",
-       "0 -0.201405 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975  \n",
-       "1 -0.482436 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853  \n",
-       "2 -0.337237 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043  \n",
-       "3 -0.421546 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731  \n",
-       "4 -0.210773 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205  \n",
-       "\n",
-       "[5 rows x 26 columns]"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.set\n",
-    "\n",
-    "* Input: readings (dataframe)\n",
-    "* Output: readings (dataframe with turbine_id)\n",
-    "* Effect: turbine_id has been set as a readings column"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 7\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "      <th>3</th>\n",
-       "      <th>4</th>\n",
-       "      <th>5</th>\n",
-       "      <th>6</th>\n",
-       "      <th>7</th>\n",
-       "      <th>8</th>\n",
-       "      <th>9</th>\n",
-       "      <th>...</th>\n",
-       "      <th>17</th>\n",
-       "      <th>18</th>\n",
-       "      <th>19</th>\n",
-       "      <th>20</th>\n",
-       "      <th>21</th>\n",
-       "      <th>22</th>\n",
-       "      <th>23</th>\n",
-       "      <th>24</th>\n",
-       "      <th>25</th>\n",
-       "      <th>turbine_id</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.261261</td>\n",
-       "      <td>-0.237069</td>\n",
-       "      <td>-0.208701</td>\n",
-       "      <td>-0.141066</td>\n",
-       "      <td>-0.323288</td>\n",
-       "      <td>-0.259694</td>\n",
-       "      <td>-0.421988</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.373977</td>\n",
-       "      <td>-0.252336</td>\n",
-       "      <td>-0.227166</td>\n",
-       "      <td>-0.201405</td>\n",
-       "      <td>-0.134818</td>\n",
-       "      <td>-0.322392</td>\n",
-       "      <td>-0.253801</td>\n",
-       "      <td>-0.418224</td>\n",
-       "      <td>-0.256975</td>\n",
-       "      <td>T001</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-0.533490</td>\n",
-       "      <td>-0.478056</td>\n",
-       "      <td>-0.490888</td>\n",
-       "      <td>-0.385580</td>\n",
-       "      <td>-0.510372</td>\n",
-       "      <td>-0.443400</td>\n",
-       "      <td>-0.554384</td>\n",
-       "      <td>-0.999830</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.373635</td>\n",
-       "      <td>-0.525701</td>\n",
-       "      <td>-0.470726</td>\n",
-       "      <td>-0.482436</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.507620</td>\n",
-       "      <td>-0.440936</td>\n",
-       "      <td>-0.553738</td>\n",
-       "      <td>-0.480853</td>\n",
-       "      <td>T001</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.414414</td>\n",
-       "      <td>-0.359718</td>\n",
-       "      <td>-0.346267</td>\n",
-       "      <td>-0.297806</td>\n",
-       "      <td>-0.447358</td>\n",
-       "      <td>-0.360360</td>\n",
-       "      <td>-0.484866</td>\n",
-       "      <td>-0.999670</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.373286</td>\n",
-       "      <td>-0.408879</td>\n",
-       "      <td>-0.348946</td>\n",
-       "      <td>-0.337237</td>\n",
-       "      <td>-0.294256</td>\n",
-       "      <td>-0.439625</td>\n",
-       "      <td>-0.354386</td>\n",
-       "      <td>-0.483645</td>\n",
-       "      <td>-0.381043</td>\n",
-       "      <td>T001</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>-0.402664</td>\n",
-       "      <td>-0.396160</td>\n",
-       "      <td>-0.428180</td>\n",
-       "      <td>-0.374608</td>\n",
-       "      <td>-0.499022</td>\n",
-       "      <td>-0.464943</td>\n",
-       "      <td>-0.567663</td>\n",
-       "      <td>-0.999499</td>\n",
-       "      <td>-0.999485</td>\n",
-       "      <td>-0.999494</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.372968</td>\n",
-       "      <td>-0.397196</td>\n",
-       "      <td>-0.388759</td>\n",
-       "      <td>-0.421546</td>\n",
-       "      <td>-0.371630</td>\n",
-       "      <td>-0.495897</td>\n",
-       "      <td>-0.461988</td>\n",
-       "      <td>-0.565421</td>\n",
-       "      <td>-0.442731</td>\n",
-       "      <td>T001</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-0.280846</td>\n",
-       "      <td>-0.264107</td>\n",
-       "      <td>-0.184793</td>\n",
-       "      <td>-0.222962</td>\n",
-       "      <td>-0.336986</td>\n",
-       "      <td>-0.271837</td>\n",
-       "      <td>-0.354814</td>\n",
-       "      <td>-0.999296</td>\n",
-       "      <td>-0.999295</td>\n",
-       "      <td>-0.999301</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.372599</td>\n",
-       "      <td>-0.282710</td>\n",
-       "      <td>-0.271663</td>\n",
-       "      <td>-0.210773</td>\n",
-       "      <td>-0.237984</td>\n",
-       "      <td>-0.343494</td>\n",
-       "      <td>-0.295906</td>\n",
-       "      <td>-0.413551</td>\n",
-       "      <td>-0.289205</td>\n",
-       "      <td>T001</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 27 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "          0         1         2         3         4         5         6  \\\n",
-       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
-       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
-       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
-       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
-       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
-       "\n",
-       "          7         8         9  ...        17        18        19        20  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.373977 -0.252336 -0.227166 -0.201405   \n",
-       "1 -0.999830 -0.999825 -0.999825  ... -0.373635 -0.525701 -0.470726 -0.482436   \n",
-       "2 -0.999670 -0.999656 -0.999656  ... -0.373286 -0.408879 -0.348946 -0.337237   \n",
-       "3 -0.999499 -0.999485 -0.999494  ... -0.372968 -0.397196 -0.388759 -0.421546   \n",
-       "4 -0.999296 -0.999295 -0.999301  ... -0.372599 -0.282710 -0.271663 -0.210773   \n",
-       "\n",
-       "         21        22        23        24        25  turbine_id  \n",
-       "0 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975        T001  \n",
-       "1 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853        T001  \n",
-       "2 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043        T001  \n",
-       "3 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731        T001  \n",
-       "4 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205        T001  \n",
-       "\n",
-       "[5 rows x 27 columns]"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.set\n",
-    "\n",
-    "* Input: readings (dataframe with turbine_id)\n",
-    "* Output: readings (dataframe with turbine_id and timestamp)\n",
-    "* Effect: timestamp has been set as a readings column"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 8\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "      <th>3</th>\n",
-       "      <th>4</th>\n",
-       "      <th>5</th>\n",
-       "      <th>6</th>\n",
-       "      <th>7</th>\n",
-       "      <th>8</th>\n",
-       "      <th>9</th>\n",
-       "      <th>...</th>\n",
-       "      <th>18</th>\n",
-       "      <th>19</th>\n",
-       "      <th>20</th>\n",
-       "      <th>21</th>\n",
-       "      <th>22</th>\n",
-       "      <th>23</th>\n",
-       "      <th>24</th>\n",
-       "      <th>25</th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.261261</td>\n",
-       "      <td>-0.237069</td>\n",
-       "      <td>-0.208701</td>\n",
-       "      <td>-0.141066</td>\n",
-       "      <td>-0.323288</td>\n",
-       "      <td>-0.259694</td>\n",
-       "      <td>-0.421988</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.252336</td>\n",
-       "      <td>-0.227166</td>\n",
-       "      <td>-0.201405</td>\n",
-       "      <td>-0.134818</td>\n",
-       "      <td>-0.322392</td>\n",
-       "      <td>-0.253801</td>\n",
-       "      <td>-0.418224</td>\n",
-       "      <td>-0.256975</td>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-0.533490</td>\n",
-       "      <td>-0.478056</td>\n",
-       "      <td>-0.490888</td>\n",
-       "      <td>-0.385580</td>\n",
-       "      <td>-0.510372</td>\n",
-       "      <td>-0.443400</td>\n",
-       "      <td>-0.554384</td>\n",
-       "      <td>-0.999830</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.525701</td>\n",
-       "      <td>-0.470726</td>\n",
-       "      <td>-0.482436</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.507620</td>\n",
-       "      <td>-0.440936</td>\n",
-       "      <td>-0.553738</td>\n",
-       "      <td>-0.480853</td>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 01:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.414414</td>\n",
-       "      <td>-0.359718</td>\n",
-       "      <td>-0.346267</td>\n",
-       "      <td>-0.297806</td>\n",
-       "      <td>-0.447358</td>\n",
-       "      <td>-0.360360</td>\n",
-       "      <td>-0.484866</td>\n",
-       "      <td>-0.999670</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.408879</td>\n",
-       "      <td>-0.348946</td>\n",
-       "      <td>-0.337237</td>\n",
-       "      <td>-0.294256</td>\n",
-       "      <td>-0.439625</td>\n",
-       "      <td>-0.354386</td>\n",
-       "      <td>-0.483645</td>\n",
-       "      <td>-0.381043</td>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 02:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>-0.402664</td>\n",
-       "      <td>-0.396160</td>\n",
-       "      <td>-0.428180</td>\n",
-       "      <td>-0.374608</td>\n",
-       "      <td>-0.499022</td>\n",
-       "      <td>-0.464943</td>\n",
-       "      <td>-0.567663</td>\n",
-       "      <td>-0.999499</td>\n",
-       "      <td>-0.999485</td>\n",
-       "      <td>-0.999494</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.397196</td>\n",
-       "      <td>-0.388759</td>\n",
-       "      <td>-0.421546</td>\n",
-       "      <td>-0.371630</td>\n",
-       "      <td>-0.495897</td>\n",
-       "      <td>-0.461988</td>\n",
-       "      <td>-0.565421</td>\n",
-       "      <td>-0.442731</td>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 03:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-0.280846</td>\n",
-       "      <td>-0.264107</td>\n",
-       "      <td>-0.184793</td>\n",
-       "      <td>-0.222962</td>\n",
-       "      <td>-0.336986</td>\n",
-       "      <td>-0.271837</td>\n",
-       "      <td>-0.354814</td>\n",
-       "      <td>-0.999296</td>\n",
-       "      <td>-0.999295</td>\n",
-       "      <td>-0.999301</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.282710</td>\n",
-       "      <td>-0.271663</td>\n",
-       "      <td>-0.210773</td>\n",
-       "      <td>-0.237984</td>\n",
-       "      <td>-0.343494</td>\n",
-       "      <td>-0.295906</td>\n",
-       "      <td>-0.413551</td>\n",
-       "      <td>-0.289205</td>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 04:00:00</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 28 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "          0         1         2         3         4         5         6  \\\n",
-       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
-       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
-       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
-       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
-       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
-       "\n",
-       "          7         8         9  ...        18        19        20        21  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.252336 -0.227166 -0.201405 -0.134818   \n",
-       "1 -0.999830 -0.999825 -0.999825  ... -0.525701 -0.470726 -0.482436 -0.378664   \n",
-       "2 -0.999670 -0.999656 -0.999656  ... -0.408879 -0.348946 -0.337237 -0.294256   \n",
-       "3 -0.999499 -0.999485 -0.999494  ... -0.397196 -0.388759 -0.421546 -0.371630   \n",
-       "4 -0.999296 -0.999295 -0.999301  ... -0.282710 -0.271663 -0.210773 -0.237984   \n",
-       "\n",
-       "         22        23        24        25  turbine_id           timestamp  \n",
-       "0 -0.322392 -0.253801 -0.418224 -0.256975        T001 2013-01-10 00:00:00  \n",
-       "1 -0.507620 -0.440936 -0.553738 -0.480853        T001 2013-01-10 01:00:00  \n",
-       "2 -0.439625 -0.354386 -0.483645 -0.381043        T001 2013-01-10 02:00:00  \n",
-       "3 -0.495897 -0.461988 -0.565421 -0.442731        T001 2013-01-10 03:00:00  \n",
-       "4 -0.343494 -0.295906 -0.413551 -0.289205        T001 2013-01-10 04:00:00  \n",
-       "\n",
-       "[5 rows x 28 columns]"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences\n",
-    "\n",
-    "* Input: X, readings (dataframe with turbine_id and timestamp)\n",
-    "* Output: X\n",
-    "* Effect: X has been converted to a 3d numpy array that contains 1 matrix of shape\n",
-    "  (window_size x num_signals) for each one of the target times."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}"
-      ]
-     },
-     "execution_count": 36,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pipeline._pipeline.get_hyperparameters()[\n",
-    "    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 9\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(8521, 28)"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(353,)"
-      ]
-     },
-     "execution_count": 40,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['y'].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(353, 24, 26)"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['X'].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {
-    "scrolled": true
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[-0.58793576, -0.60305643, -0.63981971, -0.61481191, -0.69823875,\n",
-       "        -0.65021543, -0.68912322, -0.99436914, -0.99439755, -0.99454249,\n",
-       "        -0.99446788, -0.99476185, -0.99490997, -0.99529511, -0.34701493,\n",
-       "        -0.33886256, -0.33860532, -0.36301186, -0.57943925, -0.59250585,\n",
-       "        -0.6323185 , -0.60609613, -0.69284877, -0.64444444, -0.68691589,\n",
-       "        -0.63853752],\n",
-       "       [-0.56600078, -0.5846395 , -0.63002156, -0.61559561, -0.70880626,\n",
-       "        -0.66392479, -0.69732474, -0.9942427 , -0.99427986, -0.9944408 ,\n",
-       "        -0.99436498, -0.99468147, -0.99482011, -0.99521249, -0.33955224,\n",
-       "        -0.31516588, -0.38892883, -0.36280656, -0.55841121, -0.57611241,\n",
-       "        -0.62295082, -0.61078546, -0.70222743, -0.65847953, -0.69392523,\n",
-       "        -0.63645815],\n",
-       "       [-0.64081473, -0.64184953, -0.67038997, -0.63597179, -0.71350294,\n",
-       "        -0.65844105, -0.66764304, -0.99412236, -0.99416864, -0.99434228,\n",
-       "        -0.99426059, -0.99459663, -0.99472365, -0.99511795, -0.34328358,\n",
-       "        -0.30094787, -0.36304817, -0.36259859, -0.63317757, -0.6323185 ,\n",
-       "        -0.66042155, -0.62954279, -0.70926143, -0.65380117, -0.66588785,\n",
-       "        -0.66002426],\n",
-       "       [-0.73678026, -0.72139498, -0.72800314, -0.69239812, -0.71350294,\n",
-       "        -0.68233451, -0.69732474, -0.99403811, -0.99408512, -0.9942623 ,\n",
-       "        -0.99417111, -0.99451525, -0.99463206, -0.9950315 , -0.40671642,\n",
-       "        -0.36018957, -0.44644141, -0.36242395, -0.72897196, -0.71194379,\n",
-       "        -0.71896956, -0.68347011, -0.70926143, -0.6748538 , -0.69392523,\n",
-       "        -0.71027552],\n",
-       "       [-0.75401488, -0.74333856, -0.75112679, -0.71590909, -0.76555773,\n",
-       "        -0.73599687, -0.75278266, -0.99395808, -0.99400684, -0.99419094,\n",
-       "        -0.99409367, -0.99444556, -0.99455517, -0.99495418, -0.43656716,\n",
-       "        -0.3957346 , -0.465133  , -0.36226933, -0.7453271 , -0.735363  ,\n",
-       "        -0.74004684, -0.70926143, -0.76084408, -0.73099415, -0.75      ,\n",
-       "        -0.7463178 ],\n",
-       "       [-0.79866823, -0.76684953, -0.7558299 , -0.72688088, -0.76125245,\n",
-       "        -0.75714845, -0.78363601, -0.99389098, -0.99393583, -0.99411958,\n",
-       "        -0.99401538, -0.99437709, -0.99448423, -0.99489036, -0.43843284,\n",
-       "        -0.37914692, -0.49388929, -0.36212623, -0.78971963, -0.75644028,\n",
-       "        -0.7470726 , -0.72098476, -0.75615475, -0.7497076 , -0.78037383,\n",
-       "        -0.76572518],\n",
-       "       [-0.84919702, -0.83855799, -0.82245738, -0.78134796, -0.75225049,\n",
-       "        -0.70661966, -0.65787932, -0.99384186, -0.99388279, -0.9940635 ,\n",
-       "        -0.99395157, -0.9943113 , -0.99441264, -0.99481202, -0.51679104,\n",
-       "        -0.50473934, -0.53414809, -0.36199904, -0.8411215 , -0.83138173,\n",
-       "        -0.81264637, -0.77256741, -0.74677608, -0.70292398, -0.65654206,\n",
-       "        -0.77438919],\n",
-       "       [-0.69134352, -0.705721  , -0.73584166, -0.70297806, -0.75225049,\n",
-       "        -0.72659616, -0.71724273, -0.99377229, -0.99381646, -0.99400032,\n",
-       "        -0.99387925, -0.99423682, -0.99433003, -0.99471624, -0.43843284,\n",
-       "        -0.40521327, -0.48094896, -0.36184615, -0.68457944, -0.69555035,\n",
-       "        -0.72599532, -0.6975381 , -0.74677608, -0.71929825, -0.71261682,\n",
-       "        -0.71893953],\n",
-       "       [-0.84488837, -0.82915361, -0.83578287, -0.81896552, -0.86105675,\n",
-       "        -0.8613396 , -0.86330795, -0.99369779, -0.99374656, -0.99393715,\n",
-       "        -0.99381182, -0.99418494, -0.99427639, -0.99466379, -0.49253731,\n",
-       "        -0.48104265, -0.51545651, -0.36172116, -0.8364486 , -0.81967213,\n",
-       "        -0.82435597, -0.81008206, -0.85463072, -0.85497076, -0.86214953,\n",
-       "        -0.84889967],\n",
-       "       [-0.77908343, -0.78761755, -0.78757594, -0.78918495, -0.82348337,\n",
-       "        -0.82491187, -0.85276313, -0.99365725, -0.99370625, -0.99389819,\n",
-       "        -0.99377113, -0.99415254, -0.99424222, -0.99463329, -0.52798507,\n",
-       "        -0.6042654 , -0.51545651, -0.36164779, -0.77336449, -0.77985948,\n",
-       "        -0.78220141, -0.78429074, -0.86635404, -0.82222222, -0.85046729,\n",
-       "        -0.81562987],\n",
-       "       [-0.70544458, -0.64733542, -0.64844209, -0.61833856, -0.6481409 ,\n",
-       "        -0.66392479, -0.71646163, -0.99356747, -0.99360832, -0.99380327,\n",
-       "        -0.99367558, -0.99407272, -0.99415647, -0.99456035, -0.36567164,\n",
-       "        -0.4549763 , -0.34291876, -0.36146698, -0.70560748, -0.63934426,\n",
-       "        -0.63934426, -0.62016413, -0.64830012, -0.65847953, -0.72663551,\n",
-       "        -0.66868827],\n",
-       "       [-0.70387779, -0.67202194, -0.69508132, -0.72413793, -0.73228963,\n",
-       "        -0.72816295, -0.72310096, -0.99348204, -0.99351955, -0.99372023,\n",
-       "        -0.99359367, -0.99399256, -0.99407882, -0.99449203, -0.38432836,\n",
-       "        -0.58530806, -0.33141625, -0.36130226, -0.69392523, -0.66042155,\n",
-       "        -0.68384075, -0.71629543, -0.72801876, -0.72163743, -0.72196262,\n",
-       "        -0.7113152 ],\n",
-       "       [-0.8515472 , -0.81073668, -0.776602  , -0.76724138, -0.78277886,\n",
-       "        -0.75832354, -0.74262839, -0.99341682, -0.99344607, -0.99364669,\n",
-       "        -0.99352762, -0.99392743, -0.99401037, -0.99441763, -0.44029851,\n",
-       "        -0.5521327 , -0.38461538, -0.36116102, -0.84345794, -0.80327869,\n",
-       "        -0.76814988, -0.76084408, -0.77725674, -0.75204678, -0.73831776,\n",
-       "        -0.7865188 ],\n",
-       "       [-0.80258519, -0.83659875, -0.83499902, -0.79741379, -0.80821918,\n",
-       "        -0.81629456, -0.79379028, -0.99336347, -0.99339091, -0.99358745,\n",
-       "        -0.99346147, -0.9938642 , -0.99394733, -0.99434605, -0.44962687,\n",
-       "        -0.6563981 , -0.34579439, -0.36103606, -0.79439252, -0.82669789,\n",
-       "        -0.82669789, -0.78898007, -0.80304807, -0.81052632, -0.79205607,\n",
-       "        -0.81632299],\n",
-       "       [-0.83313749, -0.87539185, -0.90241035, -0.88440439, -0.86771037,\n",
-       "        -0.87935762, -0.87580551, -0.99331764, -0.99335898, -0.99355602,\n",
-       "        -0.99342259, -0.99382267, -0.99390959, -0.99430418, -0.54291045,\n",
-       "        -0.72274882, -0.42918763, -0.36096002, -0.82943925, -0.87119438,\n",
-       "        -0.89461358, -0.87573271, -0.86166471, -0.87134503, -0.87383178,\n",
-       "        -0.88078323],\n",
-       "       [-0.56678418, -0.60031348, -0.64295512, -0.78409091, -0.76164384,\n",
-       "        -0.78535057, -0.82464362, -0.99321481, -0.99327557, -0.99349034,\n",
-       "        -0.99337881, -0.9937915 , -0.99387347, -0.99427367, -0.32835821,\n",
-       "        -0.47630332, -0.25808771, -0.36084678, -0.56074766, -0.59250585,\n",
-       "        -0.6323185 , -0.77960141, -0.84759672, -0.78947368, -0.8364486 ,\n",
-       "        -0.72621729],\n",
-       "       [-0.77007442, -0.81230408, -0.83186361, -0.85540752, -0.85870841,\n",
-       "        -0.86486486, -0.847686  , -0.99311634, -0.99319338, -0.99341516,\n",
-       "        -0.99332651, -0.99374196, -0.99381551, -0.99422246, -0.46641791,\n",
-       "        -0.65165877, -0.39324227, -0.36071245, -0.76168224, -0.80093677,\n",
-       "        -0.82201405, -0.84759672, -0.85463072, -0.85730994, -0.84579439,\n",
-       "        -0.83780974],\n",
-       "       [-0.87622405, -0.92163009, -0.91377621, -0.89224138, -0.84540117,\n",
-       "        -0.83431257, -0.82112869, -0.99306816, -0.99315821, -0.99338734,\n",
-       "        -0.99329935, -0.99370611, -0.99377885, -0.9941789 , -0.55783582,\n",
-       "        -0.65402844, -0.50970525, -0.36064058, -0.86682243, -0.91334895,\n",
-       "        -0.90632319, -0.88745604, -0.84056272, -0.82923977, -0.81775701,\n",
-       "        -0.87731762],\n",
-       "       [-0.82843713, -0.83111285, -0.84166177, -0.8322884 , -0.84579256,\n",
-       "        -0.8515472 , -0.86057411, -0.99302656, -0.99312426, -0.99335155,\n",
-       "        -0.99325919, -0.99365991, -0.99373278, -0.99413129, -0.50559701,\n",
-       "        -0.53791469, -0.52120776, -0.36055736, -0.82242991, -0.82201405,\n",
-       "        -0.83138173, -0.82415006, -0.84056272, -0.84327485, -0.85747664,\n",
-       "        -0.84508751],\n",
-       "       [-0.74539757, -0.73824451, -0.76484421, -0.72100313, -0.73228963,\n",
-       "        -0.70975323, -0.739504  , -0.99296569, -0.99306553, -0.99329699,\n",
-       "        -0.9932005 , -0.99360224, -0.99367493, -0.99407862, -0.45149254,\n",
-       "        -0.46208531, -0.48382459, -0.36044105, -0.73598131, -0.73067916,\n",
-       "        -0.75644028, -0.71629543, -0.72801876, -0.70526316, -0.73831776,\n",
-       "        -0.73696067],\n",
-       "       [-0.40814728, -0.4596395 , -0.51087596, -0.46316614, -0.54598826,\n",
-       "        -0.50607129, -0.57039641, -0.99283748, -0.99294147, -0.9931881 ,\n",
-       "        -0.99308418, -0.99349681, -0.99356041, -0.99398047, -0.30597015,\n",
-       "        -0.29383886, -0.34867002, -0.36020709, -0.46728972, -0.470726  ,\n",
-       "        -0.5175644 , -0.48651817, -0.55685815, -0.51812865, -0.59579439,\n",
-       "        -0.5179345 ],\n",
-       "       [-0.47591069, -0.45219436, -0.48579267, -0.48981191, -0.57847358,\n",
-       "        -0.54876616, -0.61882445, -0.99268659, -0.99280044, -0.99306033,\n",
-       "        -0.99295359, -0.99338192, -0.99344287, -0.9938794 , -0.30223881,\n",
-       "        -0.33649289, -0.32278936, -0.35994787, -0.49065421, -0.46370023,\n",
-       "        -0.4941452 , -0.49589683, -0.58264947, -0.55321637, -0.62850467,\n",
-       "        -0.53110379],\n",
-       "       [-0.26792009, -0.27115987, -0.30080345, -0.24412226, -0.34246575,\n",
-       "        -0.30434783, -0.40285101, -0.99250927, -0.99261854, -0.99288914,\n",
-       "        -0.99278188, -0.99322495, -0.99327569, -0.9937324 , -0.22947761,\n",
-       "        -0.28909953, -0.26096334, -0.35960139, -0.33878505, -0.29976581,\n",
-       "        -0.32786885, -0.2919109 , -0.38100821, -0.32865497, -0.42523364,\n",
-       "        -0.3394559 ],\n",
-       "       [-0.31374853, -0.26449843, -0.2941407 , -0.23315047, -0.36516634,\n",
-       "        -0.35957697, -0.44112478, -0.9923035 , -0.99241264, -0.99269787,\n",
-       "        -0.99258055, -0.99304482, -0.99309553, -0.99356987, -0.2108209 ,\n",
-       "        -0.21563981, -0.23652049, -0.35921021, -0.30607477, -0.26229508,\n",
-       "        -0.29039813, -0.23563892, -0.35990621, -0.35204678, -0.43925234,\n",
-       "        -0.32004852]])"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['X'][0]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## keras.Sequential.DoubleLSTMTimeSeriesClassifier\n",
-    "\n",
-    "* Input: X, y\n",
-    "* Output: \n",
-    "* Effect: DoubleLSTM has been fitted."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 10\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb b/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
deleted file mode 100644
index 1a10480..0000000
--- a/tutorials/pipelines/unstack_lstm_timeseries_classifier.ipynb
+++ /dev/null
@@ -1,2355 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# unstack_lstm_timeseries_classifier"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from draco.demo import load_demo\n",
-    "\n",
-    "target_times, readings = load_demo()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pipeline_name = 'classes.unstack_lstm_timeseries_classifier'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from draco.pipeline import DracoPipeline\n",
-    "\n",
-    "pipeline = DracoPipeline(pipeline_name)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['pandas.DataFrame.resample',\n",
-       " 'pandas.DataFrame.unstack',\n",
-       " 'pandas.DataFrame.pop',\n",
-       " 'pandas.DataFrame.pop',\n",
-       " 'sklearn.impute.SimpleImputer',\n",
-       " 'sklearn.preprocessing.MinMaxScaler',\n",
-       " 'pandas.DataFrame',\n",
-       " 'pandas.DataFrame.set',\n",
-       " 'pandas.DataFrame.set',\n",
-       " 'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
-       " 'keras.Sequential.LSTMTimeSeriesClassifier']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pipeline.template['primitives']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Step by Step execution"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Input Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>323.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S02</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>320.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S03</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>284.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S04</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>348.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S05</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>273.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id signal_id  timestamp  value\n",
-       "0       T001       S01 2013-01-10  323.0\n",
-       "1       T001       S02 2013-01-10  320.0\n",
-       "2       T001       S03 2013-01-10  284.0\n",
-       "3       T001       S04 2013-01-10  348.0\n",
-       "4       T001       S05 2013-01-10  273.0"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "readings.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>cutoff_time</th>\n",
-       "      <th>target</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-12</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-13</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-14</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-15</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-16</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id cutoff_time  target\n",
-       "0       T001  2013-01-12       0\n",
-       "1       T001  2013-01-13       0\n",
-       "2       T001  2013-01-14       0\n",
-       "3       T001  2013-01-15       1\n",
-       "4       T001  2013-01-16       0"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_times.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Data Preparation (part of Draco Pipeline)\n",
-    "\n",
-    "* Input: target_times, readings, turbines\n",
-    "* Output: X, y, readings, turbines\n",
-    "* Effect: target_times has been split into X and y"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.resample\n",
-    "\n",
-    "* Input: readings\n",
-    "* Output: readings (resampled)\n",
-    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
-    "  signal_id and timestamp have been set as a multi-index"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "context = pipeline.fit(target_times, readings, output_=0)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th rowspan=\"5\" valign=\"top\">T001</th>\n",
-       "      <th rowspan=\"5\" valign=\"top\">S01</th>\n",
-       "      <th>2013-01-10 00:00:00</th>\n",
-       "      <td>313.333333</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 01:00:00</th>\n",
-       "      <td>197.500000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 02:00:00</th>\n",
-       "      <td>248.166667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 03:00:00</th>\n",
-       "      <td>253.166667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2013-01-10 04:00:00</th>\n",
-       "      <td>305.000000</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "                                               value\n",
-       "turbine_id signal_id timestamp                      \n",
-       "T001       S01       2013-01-10 00:00:00  313.333333\n",
-       "                     2013-01-10 01:00:00  197.500000\n",
-       "                     2013-01-10 02:00:00  248.166667\n",
-       "                     2013-01-10 03:00:00  253.166667\n",
-       "                     2013-01-10 04:00:00  305.000000"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.unstack\n",
-    "\n",
-    "* Input: readings (resampled)\n",
-    "* Output: readings (unstacked)\n",
-    "* Effect: readings have been unstacked"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 1\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value_S01</th>\n",
-       "      <th>value_S02</th>\n",
-       "      <th>value_S03</th>\n",
-       "      <th>value_S04</th>\n",
-       "      <th>value_S05</th>\n",
-       "      <th>value_S06</th>\n",
-       "      <th>value_S07</th>\n",
-       "      <th>value_S08</th>\n",
-       "      <th>...</th>\n",
-       "      <th>value_S17</th>\n",
-       "      <th>value_S18</th>\n",
-       "      <th>value_S19</th>\n",
-       "      <th>value_S20</th>\n",
-       "      <th>value_S21</th>\n",
-       "      <th>value_S22</th>\n",
-       "      <th>value_S23</th>\n",
-       "      <th>value_S24</th>\n",
-       "      <th>value_S25</th>\n",
-       "      <th>value_S26</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>313.333333</td>\n",
-       "      <td>323.833333</td>\n",
-       "      <td>336.000000</td>\n",
-       "      <td>364.666667</td>\n",
-       "      <td>286.500000</td>\n",
-       "      <td>314.000000</td>\n",
-       "      <td>243.166667</td>\n",
-       "      <td>3.197980e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.383333</td>\n",
-       "      <td>3.131958e+06</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>54.333333</td>\n",
-       "      <td>56.166667</td>\n",
-       "      <td>61.000000</td>\n",
-       "      <td>47.666667</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>40.833333</td>\n",
-       "      <td>357.333333</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 01:00:00</td>\n",
-       "      <td>197.500000</td>\n",
-       "      <td>221.333333</td>\n",
-       "      <td>216.000000</td>\n",
-       "      <td>260.666667</td>\n",
-       "      <td>206.833333</td>\n",
-       "      <td>235.833333</td>\n",
-       "      <td>186.666667</td>\n",
-       "      <td>3.198221e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.666667</td>\n",
-       "      <td>3.133668e+06</td>\n",
-       "      <td>33.166667</td>\n",
-       "      <td>37.000000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>43.666667</td>\n",
-       "      <td>34.500000</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>31.166667</td>\n",
-       "      <td>249.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 02:00:00</td>\n",
-       "      <td>248.166667</td>\n",
-       "      <td>271.666667</td>\n",
-       "      <td>277.500000</td>\n",
-       "      <td>298.000000</td>\n",
-       "      <td>233.666667</td>\n",
-       "      <td>271.166667</td>\n",
-       "      <td>216.333333</td>\n",
-       "      <td>3.198448e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.833333</td>\n",
-       "      <td>3.135413e+06</td>\n",
-       "      <td>41.500000</td>\n",
-       "      <td>45.666667</td>\n",
-       "      <td>46.500000</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>45.500000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>297.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 03:00:00</td>\n",
-       "      <td>253.166667</td>\n",
-       "      <td>256.166667</td>\n",
-       "      <td>242.666667</td>\n",
-       "      <td>265.333333</td>\n",
-       "      <td>211.666667</td>\n",
-       "      <td>226.666667</td>\n",
-       "      <td>181.000000</td>\n",
-       "      <td>3.198691e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.433333</td>\n",
-       "      <td>3.137001e+06</td>\n",
-       "      <td>42.333333</td>\n",
-       "      <td>42.833333</td>\n",
-       "      <td>40.500000</td>\n",
-       "      <td>44.166667</td>\n",
-       "      <td>35.333333</td>\n",
-       "      <td>37.833333</td>\n",
-       "      <td>30.333333</td>\n",
-       "      <td>268.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 04:00:00</td>\n",
-       "      <td>305.000000</td>\n",
-       "      <td>312.333333</td>\n",
-       "      <td>346.166667</td>\n",
-       "      <td>329.833333</td>\n",
-       "      <td>280.666667</td>\n",
-       "      <td>308.833333</td>\n",
-       "      <td>271.833333</td>\n",
-       "      <td>3.198978e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.083333</td>\n",
-       "      <td>3.138843e+06</td>\n",
-       "      <td>50.500000</td>\n",
-       "      <td>51.166667</td>\n",
-       "      <td>55.500000</td>\n",
-       "      <td>53.666667</td>\n",
-       "      <td>46.166667</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>41.166667</td>\n",
-       "      <td>341.833333</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 28 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id           timestamp   value_S01   value_S02   value_S03  \\\n",
-       "0       T001 2013-01-10 00:00:00  313.333333  323.833333  336.000000   \n",
-       "1       T001 2013-01-10 01:00:00  197.500000  221.333333  216.000000   \n",
-       "2       T001 2013-01-10 02:00:00  248.166667  271.666667  277.500000   \n",
-       "3       T001 2013-01-10 03:00:00  253.166667  256.166667  242.666667   \n",
-       "4       T001 2013-01-10 04:00:00  305.000000  312.333333  346.166667   \n",
-       "\n",
-       "    value_S04   value_S05   value_S06   value_S07     value_S08  ...  \\\n",
-       "0  364.666667  286.500000  314.000000  243.166667  3.197980e+06  ...   \n",
-       "1  260.666667  206.833333  235.833333  186.666667  3.198221e+06  ...   \n",
-       "2  298.000000  233.666667  271.166667  216.333333  3.198448e+06  ...   \n",
-       "3  265.333333  211.666667  226.666667  181.000000  3.198691e+06  ...   \n",
-       "4  329.833333  280.666667  308.833333  271.833333  3.198978e+06  ...   \n",
-       "\n",
-       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
-       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
-       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
-       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
-       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
-       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
-       "\n",
-       "   value_S23  value_S24  value_S25   value_S26  \n",
-       "0  47.666667  52.666667  40.833333  357.333333  \n",
-       "1  34.500000  39.333333  31.166667  249.666667  \n",
-       "2  39.333333  45.500000  36.166667  297.666667  \n",
-       "3  35.333333  37.833333  30.333333  268.000000  \n",
-       "4  46.166667  49.666667  41.166667  341.833333  \n",
-       "\n",
-       "[5 rows x 28 columns]"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.pop\n",
-    "\n",
-    "* Input: readings (unstacked)\n",
-    "* Output: readings (without turbine_id), turbine_id\n",
-    "* Effect: turbine_id has been popped from readings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 2\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0    T001\n",
-       "1    T001\n",
-       "2    T001\n",
-       "3    T001\n",
-       "4    T001\n",
-       "Name: turbine_id, dtype: object"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['turbine_id'].head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value_S01</th>\n",
-       "      <th>value_S02</th>\n",
-       "      <th>value_S03</th>\n",
-       "      <th>value_S04</th>\n",
-       "      <th>value_S05</th>\n",
-       "      <th>value_S06</th>\n",
-       "      <th>value_S07</th>\n",
-       "      <th>value_S08</th>\n",
-       "      <th>value_S09</th>\n",
-       "      <th>...</th>\n",
-       "      <th>value_S17</th>\n",
-       "      <th>value_S18</th>\n",
-       "      <th>value_S19</th>\n",
-       "      <th>value_S20</th>\n",
-       "      <th>value_S21</th>\n",
-       "      <th>value_S22</th>\n",
-       "      <th>value_S23</th>\n",
-       "      <th>value_S24</th>\n",
-       "      <th>value_S25</th>\n",
-       "      <th>value_S26</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>313.333333</td>\n",
-       "      <td>323.833333</td>\n",
-       "      <td>336.000000</td>\n",
-       "      <td>364.666667</td>\n",
-       "      <td>286.500000</td>\n",
-       "      <td>314.000000</td>\n",
-       "      <td>243.166667</td>\n",
-       "      <td>3.197980e+06</td>\n",
-       "      <td>695143.166667</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.383333</td>\n",
-       "      <td>3.131958e+06</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>54.333333</td>\n",
-       "      <td>56.166667</td>\n",
-       "      <td>61.000000</td>\n",
-       "      <td>47.666667</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>40.833333</td>\n",
-       "      <td>357.333333</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2013-01-10 01:00:00</td>\n",
-       "      <td>197.500000</td>\n",
-       "      <td>221.333333</td>\n",
-       "      <td>216.000000</td>\n",
-       "      <td>260.666667</td>\n",
-       "      <td>206.833333</td>\n",
-       "      <td>235.833333</td>\n",
-       "      <td>186.666667</td>\n",
-       "      <td>3.198221e+06</td>\n",
-       "      <td>695403.666667</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.666667</td>\n",
-       "      <td>3.133668e+06</td>\n",
-       "      <td>33.166667</td>\n",
-       "      <td>37.000000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>43.666667</td>\n",
-       "      <td>34.500000</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>31.166667</td>\n",
-       "      <td>249.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>2013-01-10 02:00:00</td>\n",
-       "      <td>248.166667</td>\n",
-       "      <td>271.666667</td>\n",
-       "      <td>277.500000</td>\n",
-       "      <td>298.000000</td>\n",
-       "      <td>233.666667</td>\n",
-       "      <td>271.166667</td>\n",
-       "      <td>216.333333</td>\n",
-       "      <td>3.198448e+06</td>\n",
-       "      <td>695656.500000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.833333</td>\n",
-       "      <td>3.135413e+06</td>\n",
-       "      <td>41.500000</td>\n",
-       "      <td>45.666667</td>\n",
-       "      <td>46.500000</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>45.500000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>297.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>2013-01-10 03:00:00</td>\n",
-       "      <td>253.166667</td>\n",
-       "      <td>256.166667</td>\n",
-       "      <td>242.666667</td>\n",
-       "      <td>265.333333</td>\n",
-       "      <td>211.666667</td>\n",
-       "      <td>226.666667</td>\n",
-       "      <td>181.000000</td>\n",
-       "      <td>3.198691e+06</td>\n",
-       "      <td>695911.333333</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.433333</td>\n",
-       "      <td>3.137001e+06</td>\n",
-       "      <td>42.333333</td>\n",
-       "      <td>42.833333</td>\n",
-       "      <td>40.500000</td>\n",
-       "      <td>44.166667</td>\n",
-       "      <td>35.333333</td>\n",
-       "      <td>37.833333</td>\n",
-       "      <td>30.333333</td>\n",
-       "      <td>268.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>2013-01-10 04:00:00</td>\n",
-       "      <td>305.000000</td>\n",
-       "      <td>312.333333</td>\n",
-       "      <td>346.166667</td>\n",
-       "      <td>329.833333</td>\n",
-       "      <td>280.666667</td>\n",
-       "      <td>308.833333</td>\n",
-       "      <td>271.833333</td>\n",
-       "      <td>3.198978e+06</td>\n",
-       "      <td>696195.833333</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.083333</td>\n",
-       "      <td>3.138843e+06</td>\n",
-       "      <td>50.500000</td>\n",
-       "      <td>51.166667</td>\n",
-       "      <td>55.500000</td>\n",
-       "      <td>53.666667</td>\n",
-       "      <td>46.166667</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>41.166667</td>\n",
-       "      <td>341.833333</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 27 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            timestamp   value_S01   value_S02   value_S03   value_S04  \\\n",
-       "0 2013-01-10 00:00:00  313.333333  323.833333  336.000000  364.666667   \n",
-       "1 2013-01-10 01:00:00  197.500000  221.333333  216.000000  260.666667   \n",
-       "2 2013-01-10 02:00:00  248.166667  271.666667  277.500000  298.000000   \n",
-       "3 2013-01-10 03:00:00  253.166667  256.166667  242.666667  265.333333   \n",
-       "4 2013-01-10 04:00:00  305.000000  312.333333  346.166667  329.833333   \n",
-       "\n",
-       "    value_S05   value_S06   value_S07     value_S08      value_S09  ...  \\\n",
-       "0  286.500000  314.000000  243.166667  3.197980e+06  695143.166667  ...   \n",
-       "1  206.833333  235.833333  186.666667  3.198221e+06  695403.666667  ...   \n",
-       "2  233.666667  271.166667  216.333333  3.198448e+06  695656.500000  ...   \n",
-       "3  211.666667  226.666667  181.000000  3.198691e+06  695911.333333  ...   \n",
-       "4  280.666667  308.833333  271.833333  3.198978e+06  696195.833333  ...   \n",
-       "\n",
-       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
-       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
-       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
-       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
-       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
-       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
-       "\n",
-       "   value_S23  value_S24  value_S25   value_S26  \n",
-       "0  47.666667  52.666667  40.833333  357.333333  \n",
-       "1  34.500000  39.333333  31.166667  249.666667  \n",
-       "2  39.333333  45.500000  36.166667  297.666667  \n",
-       "3  35.333333  37.833333  30.333333  268.000000  \n",
-       "4  46.166667  49.666667  41.166667  341.833333  \n",
-       "\n",
-       "[5 rows x 27 columns]"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.pop\n",
-    "\n",
-    "* Input: readings (without turbine_id)\n",
-    "* Output: readings (without timestamp), timestamp\n",
-    "* Effect: timestamp has been popped from readings"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 3\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0   2013-01-10 00:00:00\n",
-       "1   2013-01-10 01:00:00\n",
-       "2   2013-01-10 02:00:00\n",
-       "3   2013-01-10 03:00:00\n",
-       "4   2013-01-10 04:00:00\n",
-       "Name: timestamp, dtype: datetime64[ns]"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['timestamp'].head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>value_S01</th>\n",
-       "      <th>value_S02</th>\n",
-       "      <th>value_S03</th>\n",
-       "      <th>value_S04</th>\n",
-       "      <th>value_S05</th>\n",
-       "      <th>value_S06</th>\n",
-       "      <th>value_S07</th>\n",
-       "      <th>value_S08</th>\n",
-       "      <th>value_S09</th>\n",
-       "      <th>value_S10</th>\n",
-       "      <th>...</th>\n",
-       "      <th>value_S17</th>\n",
-       "      <th>value_S18</th>\n",
-       "      <th>value_S19</th>\n",
-       "      <th>value_S20</th>\n",
-       "      <th>value_S21</th>\n",
-       "      <th>value_S22</th>\n",
-       "      <th>value_S23</th>\n",
-       "      <th>value_S24</th>\n",
-       "      <th>value_S25</th>\n",
-       "      <th>value_S26</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>313.333333</td>\n",
-       "      <td>323.833333</td>\n",
-       "      <td>336.000000</td>\n",
-       "      <td>364.666667</td>\n",
-       "      <td>286.500000</td>\n",
-       "      <td>314.000000</td>\n",
-       "      <td>243.166667</td>\n",
-       "      <td>3.197980e+06</td>\n",
-       "      <td>695143.166667</td>\n",
-       "      <td>3.348384e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.383333</td>\n",
-       "      <td>3.131958e+06</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>54.333333</td>\n",
-       "      <td>56.166667</td>\n",
-       "      <td>61.000000</td>\n",
-       "      <td>47.666667</td>\n",
-       "      <td>52.666667</td>\n",
-       "      <td>40.833333</td>\n",
-       "      <td>357.333333</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>197.500000</td>\n",
-       "      <td>221.333333</td>\n",
-       "      <td>216.000000</td>\n",
-       "      <td>260.666667</td>\n",
-       "      <td>206.833333</td>\n",
-       "      <td>235.833333</td>\n",
-       "      <td>186.666667</td>\n",
-       "      <td>3.198221e+06</td>\n",
-       "      <td>695403.666667</td>\n",
-       "      <td>3.348651e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.666667</td>\n",
-       "      <td>3.133668e+06</td>\n",
-       "      <td>33.166667</td>\n",
-       "      <td>37.000000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>43.666667</td>\n",
-       "      <td>34.500000</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>31.166667</td>\n",
-       "      <td>249.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>248.166667</td>\n",
-       "      <td>271.666667</td>\n",
-       "      <td>277.500000</td>\n",
-       "      <td>298.000000</td>\n",
-       "      <td>233.666667</td>\n",
-       "      <td>271.166667</td>\n",
-       "      <td>216.333333</td>\n",
-       "      <td>3.198448e+06</td>\n",
-       "      <td>695656.500000</td>\n",
-       "      <td>3.348910e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.833333</td>\n",
-       "      <td>3.135413e+06</td>\n",
-       "      <td>41.500000</td>\n",
-       "      <td>45.666667</td>\n",
-       "      <td>46.500000</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>39.333333</td>\n",
-       "      <td>45.500000</td>\n",
-       "      <td>36.166667</td>\n",
-       "      <td>297.666667</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>253.166667</td>\n",
-       "      <td>256.166667</td>\n",
-       "      <td>242.666667</td>\n",
-       "      <td>265.333333</td>\n",
-       "      <td>211.666667</td>\n",
-       "      <td>226.666667</td>\n",
-       "      <td>181.000000</td>\n",
-       "      <td>3.198691e+06</td>\n",
-       "      <td>695911.333333</td>\n",
-       "      <td>3.349157e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>8.433333</td>\n",
-       "      <td>3.137001e+06</td>\n",
-       "      <td>42.333333</td>\n",
-       "      <td>42.833333</td>\n",
-       "      <td>40.500000</td>\n",
-       "      <td>44.166667</td>\n",
-       "      <td>35.333333</td>\n",
-       "      <td>37.833333</td>\n",
-       "      <td>30.333333</td>\n",
-       "      <td>268.000000</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>305.000000</td>\n",
-       "      <td>312.333333</td>\n",
-       "      <td>346.166667</td>\n",
-       "      <td>329.833333</td>\n",
-       "      <td>280.666667</td>\n",
-       "      <td>308.833333</td>\n",
-       "      <td>271.833333</td>\n",
-       "      <td>3.198978e+06</td>\n",
-       "      <td>696195.833333</td>\n",
-       "      <td>3.349452e+06</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.083333</td>\n",
-       "      <td>3.138843e+06</td>\n",
-       "      <td>50.500000</td>\n",
-       "      <td>51.166667</td>\n",
-       "      <td>55.500000</td>\n",
-       "      <td>53.666667</td>\n",
-       "      <td>46.166667</td>\n",
-       "      <td>49.666667</td>\n",
-       "      <td>41.166667</td>\n",
-       "      <td>341.833333</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 26 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    value_S01   value_S02   value_S03   value_S04   value_S05   value_S06  \\\n",
-       "0  313.333333  323.833333  336.000000  364.666667  286.500000  314.000000   \n",
-       "1  197.500000  221.333333  216.000000  260.666667  206.833333  235.833333   \n",
-       "2  248.166667  271.666667  277.500000  298.000000  233.666667  271.166667   \n",
-       "3  253.166667  256.166667  242.666667  265.333333  211.666667  226.666667   \n",
-       "4  305.000000  312.333333  346.166667  329.833333  280.666667  308.833333   \n",
-       "\n",
-       "    value_S07     value_S08      value_S09     value_S10  ...  value_S17  \\\n",
-       "0  243.166667  3.197980e+06  695143.166667  3.348384e+06  ...  10.383333   \n",
-       "1  186.666667  3.198221e+06  695403.666667  3.348651e+06  ...   8.666667   \n",
-       "2  216.333333  3.198448e+06  695656.500000  3.348910e+06  ...   8.833333   \n",
-       "3  181.000000  3.198691e+06  695911.333333  3.349157e+06  ...   8.433333   \n",
-       "4  271.833333  3.198978e+06  696195.833333  3.349452e+06  ...   9.083333   \n",
-       "\n",
-       "      value_S18  value_S19  value_S20  value_S21  value_S22  value_S23  \\\n",
-       "0  3.131958e+06  52.666667  54.333333  56.166667  61.000000  47.666667   \n",
-       "1  3.133668e+06  33.166667  37.000000  36.166667  43.666667  34.500000   \n",
-       "2  3.135413e+06  41.500000  45.666667  46.500000  49.666667  39.333333   \n",
-       "3  3.137001e+06  42.333333  42.833333  40.500000  44.166667  35.333333   \n",
-       "4  3.138843e+06  50.500000  51.166667  55.500000  53.666667  46.166667   \n",
-       "\n",
-       "   value_S24  value_S25   value_S26  \n",
-       "0  52.666667  40.833333  357.333333  \n",
-       "1  39.333333  31.166667  249.666667  \n",
-       "2  45.500000  36.166667  297.666667  \n",
-       "3  37.833333  30.333333  268.000000  \n",
-       "4  49.666667  41.166667  341.833333  \n",
-       "\n",
-       "[5 rows x 26 columns]"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## sklearn.impute.SimpleImputer\n",
-    "\n",
-    "* Input: readings (unstacked, no turbine_id, no timestamp)\n",
-    "* Output: readings (imputed, numpy array)\n",
-    "* Effect: readings have been imputed and converted to numpy array"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 4\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 23,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[3.13333333e+02, 3.23833333e+02, 3.36000000e+02, 3.64666667e+02,\n",
-       "        2.86500000e+02, 3.14000000e+02, 2.43166667e+02, 3.19798000e+06,\n",
-       "        6.95143167e+05, 3.34838383e+06, 3.43692150e+06, 3.32248667e+06,\n",
-       "        3.35809000e+06, 3.22390150e+06, 7.95000000e+00, 5.85000000e+00,\n",
-       "        1.03833333e+01, 3.13195833e+06, 5.26666667e+01, 5.43333333e+01,\n",
-       "        5.61666667e+01, 6.10000000e+01, 4.76666667e+01, 5.26666667e+01,\n",
-       "        4.08333333e+01, 3.57333333e+02],\n",
-       "       [1.97500000e+02, 2.21333333e+02, 2.16000000e+02, 2.60666667e+02,\n",
-       "        2.06833333e+02, 2.35833333e+02, 1.86666667e+02, 3.19822067e+06,\n",
-       "        6.95403667e+05, 3.34865117e+06, 3.43722283e+06, 3.32272200e+06,\n",
-       "        3.35834000e+06, 3.22409567e+06, 6.83333333e+00, 5.15000000e+00,\n",
-       "        8.66666667e+00, 3.13366817e+06, 3.31666667e+01, 3.70000000e+01,\n",
-       "        3.61666667e+01, 4.36666667e+01, 3.45000000e+01, 3.93333333e+01,\n",
-       "        3.11666667e+01, 2.49666667e+02],\n",
-       "       [2.48166667e+02, 2.71666667e+02, 2.77500000e+02, 2.98000000e+02,\n",
-       "        2.33666667e+02, 2.71166667e+02, 2.16333333e+02, 3.19844767e+06,\n",
-       "        6.95656500e+05, 3.34890967e+06, 3.43751900e+06, 3.32295950e+06,\n",
-       "        3.35862067e+06, 3.22432333e+06, 7.11666667e+00, 5.56666667e+00,\n",
-       "        8.83333333e+00, 3.13541283e+06, 4.15000000e+01, 4.56666667e+01,\n",
-       "        4.65000000e+01, 4.96666667e+01, 3.93333333e+01, 4.55000000e+01,\n",
-       "        3.61666667e+01, 2.97666667e+02],\n",
-       "       [2.53166667e+02, 2.56166667e+02, 2.42666667e+02, 2.65333333e+02,\n",
-       "        2.11666667e+02, 2.26666667e+02, 1.81000000e+02, 3.19869117e+06,\n",
-       "        6.95911333e+05, 3.34915717e+06, 3.43778050e+06, 3.32316850e+06,\n",
-       "        3.35884883e+06, 3.22450217e+06, 6.71666667e+00, 5.16666667e+00,\n",
-       "        8.43333333e+00, 3.13700133e+06, 4.23333333e+01, 4.28333333e+01,\n",
-       "        4.05000000e+01, 4.41666667e+01, 3.53333333e+01, 3.78333333e+01,\n",
-       "        3.03333333e+01, 2.68000000e+02],\n",
-       "       [3.05000000e+02, 3.12333333e+02, 3.46166667e+02, 3.29833333e+02,\n",
-       "        2.80666667e+02, 3.08833333e+02, 2.71833333e+02, 3.19897850e+06,\n",
-       "        6.96195833e+05, 3.34945200e+06, 3.43807767e+06, 3.32340933e+06,\n",
-       "        3.35910983e+06, 3.22471400e+06, 7.20000000e+00, 5.28333333e+00,\n",
-       "        9.08333333e+00, 3.13884333e+06, 5.05000000e+01, 5.11666667e+01,\n",
-       "        5.55000000e+01, 5.36666667e+01, 4.61666667e+01, 4.96666667e+01,\n",
-       "        4.11666667e+01, 3.41833333e+02]])"
-      ]
-     },
-     "execution_count": 23,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'][0:5]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## sklearn.preprocessing.MinMaxScaler\n",
-    "\n",
-    "* Input: (imputed, array)\n",
-    "* Output: readings (scaled, array)\n",
-    "* Effect: readings have been scaled to [-1, 1] range"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 5\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 25,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[-0.26126126, -0.23706897, -0.20870076, -0.14106583, -0.32328767,\n",
-       "        -0.25969448, -0.42198789, -1.        , -1.        , -1.        ,\n",
-       "        -1.        , -1.        , -1.        , -1.        , -0.11007463,\n",
-       "        -0.16824645, -0.10424155, -0.37397741, -0.25233645, -0.22716628,\n",
-       "        -0.20140515, -0.13481829, -0.32239156, -0.25380117, -0.4182243 ,\n",
-       "        -0.25697453],\n",
-       "       [-0.53349001, -0.47805643, -0.49088771, -0.38557994, -0.51037182,\n",
-       "        -0.44339992, -0.55438391, -0.99983031, -0.99982547, -0.99982499,\n",
-       "        -0.99980741, -0.9998428 , -0.99983779, -0.99986887, -0.23507463,\n",
-       "        -0.26777251, -0.25233645, -0.37363511, -0.52570093, -0.470726  ,\n",
-       "        -0.4824356 , -0.37866354, -0.50762016, -0.44093567, -0.55373832,\n",
-       "        -0.48085254],\n",
-       "       [-0.41441441, -0.35971787, -0.3462669 , -0.29780564, -0.44735812,\n",
-       "        -0.36036036, -0.48486624, -0.99967026, -0.99965608, -0.99965576,\n",
-       "        -0.99961813, -0.99968416, -0.99965569, -0.99971512, -0.20335821,\n",
-       "        -0.20853081, -0.2379583 , -0.37328583, -0.4088785 , -0.34894614,\n",
-       "        -0.33723653, -0.29425557, -0.43962485, -0.35438596, -0.48364486,\n",
-       "        -0.38104315],\n",
-       "       [-0.40266353, -0.39615987, -0.4281795 , -0.37460815, -0.49902153,\n",
-       "        -0.4649432 , -0.56766257, -0.99949857, -0.99948535, -0.99949373,\n",
-       "        -0.999451  , -0.99954455, -0.99950765, -0.99959435, -0.24813433,\n",
-       "        -0.26540284, -0.27246585, -0.37296782, -0.39719626, -0.38875878,\n",
-       "        -0.42154567, -0.37162954, -0.49589683, -0.4619883 , -0.56542056,\n",
-       "        -0.4427309 ],\n",
-       "       [-0.28084606, -0.26410658, -0.18479326, -0.22296238, -0.3369863 ,\n",
-       "        -0.27183705, -0.35481351, -0.99929598, -0.99929474, -0.99930071,\n",
-       "        -0.99926107, -0.99938368, -0.99933831, -0.9994513 , -0.19402985,\n",
-       "        -0.24881517, -0.21639109, -0.37259906, -0.28271028, -0.27166276,\n",
-       "        -0.21077283, -0.23798359, -0.34349355, -0.29590643, -0.4135514 ,\n",
-       "        -0.28920464]])"
-      ]
-     },
-     "execution_count": 26,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'][0:5]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame\n",
-    "\n",
-    "* Input: readings (scaled, array)\n",
-    "* Output: readings (dataframe)\n",
-    "* Effect: readings have been converted into a dataframe"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 6\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 28,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "      <th>3</th>\n",
-       "      <th>4</th>\n",
-       "      <th>5</th>\n",
-       "      <th>6</th>\n",
-       "      <th>7</th>\n",
-       "      <th>8</th>\n",
-       "      <th>9</th>\n",
-       "      <th>...</th>\n",
-       "      <th>16</th>\n",
-       "      <th>17</th>\n",
-       "      <th>18</th>\n",
-       "      <th>19</th>\n",
-       "      <th>20</th>\n",
-       "      <th>21</th>\n",
-       "      <th>22</th>\n",
-       "      <th>23</th>\n",
-       "      <th>24</th>\n",
-       "      <th>25</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.261261</td>\n",
-       "      <td>-0.237069</td>\n",
-       "      <td>-0.208701</td>\n",
-       "      <td>-0.141066</td>\n",
-       "      <td>-0.323288</td>\n",
-       "      <td>-0.259694</td>\n",
-       "      <td>-0.421988</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.104242</td>\n",
-       "      <td>-0.373977</td>\n",
-       "      <td>-0.252336</td>\n",
-       "      <td>-0.227166</td>\n",
-       "      <td>-0.201405</td>\n",
-       "      <td>-0.134818</td>\n",
-       "      <td>-0.322392</td>\n",
-       "      <td>-0.253801</td>\n",
-       "      <td>-0.418224</td>\n",
-       "      <td>-0.256975</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-0.533490</td>\n",
-       "      <td>-0.478056</td>\n",
-       "      <td>-0.490888</td>\n",
-       "      <td>-0.385580</td>\n",
-       "      <td>-0.510372</td>\n",
-       "      <td>-0.443400</td>\n",
-       "      <td>-0.554384</td>\n",
-       "      <td>-0.999830</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.252336</td>\n",
-       "      <td>-0.373635</td>\n",
-       "      <td>-0.525701</td>\n",
-       "      <td>-0.470726</td>\n",
-       "      <td>-0.482436</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.507620</td>\n",
-       "      <td>-0.440936</td>\n",
-       "      <td>-0.553738</td>\n",
-       "      <td>-0.480853</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.414414</td>\n",
-       "      <td>-0.359718</td>\n",
-       "      <td>-0.346267</td>\n",
-       "      <td>-0.297806</td>\n",
-       "      <td>-0.447358</td>\n",
-       "      <td>-0.360360</td>\n",
-       "      <td>-0.484866</td>\n",
-       "      <td>-0.999670</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.237958</td>\n",
-       "      <td>-0.373286</td>\n",
-       "      <td>-0.408879</td>\n",
-       "      <td>-0.348946</td>\n",
-       "      <td>-0.337237</td>\n",
-       "      <td>-0.294256</td>\n",
-       "      <td>-0.439625</td>\n",
-       "      <td>-0.354386</td>\n",
-       "      <td>-0.483645</td>\n",
-       "      <td>-0.381043</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>-0.402664</td>\n",
-       "      <td>-0.396160</td>\n",
-       "      <td>-0.428180</td>\n",
-       "      <td>-0.374608</td>\n",
-       "      <td>-0.499022</td>\n",
-       "      <td>-0.464943</td>\n",
-       "      <td>-0.567663</td>\n",
-       "      <td>-0.999499</td>\n",
-       "      <td>-0.999485</td>\n",
-       "      <td>-0.999494</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.272466</td>\n",
-       "      <td>-0.372968</td>\n",
-       "      <td>-0.397196</td>\n",
-       "      <td>-0.388759</td>\n",
-       "      <td>-0.421546</td>\n",
-       "      <td>-0.371630</td>\n",
-       "      <td>-0.495897</td>\n",
-       "      <td>-0.461988</td>\n",
-       "      <td>-0.565421</td>\n",
-       "      <td>-0.442731</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-0.280846</td>\n",
-       "      <td>-0.264107</td>\n",
-       "      <td>-0.184793</td>\n",
-       "      <td>-0.222962</td>\n",
-       "      <td>-0.336986</td>\n",
-       "      <td>-0.271837</td>\n",
-       "      <td>-0.354814</td>\n",
-       "      <td>-0.999296</td>\n",
-       "      <td>-0.999295</td>\n",
-       "      <td>-0.999301</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.216391</td>\n",
-       "      <td>-0.372599</td>\n",
-       "      <td>-0.282710</td>\n",
-       "      <td>-0.271663</td>\n",
-       "      <td>-0.210773</td>\n",
-       "      <td>-0.237984</td>\n",
-       "      <td>-0.343494</td>\n",
-       "      <td>-0.295906</td>\n",
-       "      <td>-0.413551</td>\n",
-       "      <td>-0.289205</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 26 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "         0         1         2         3         4         5         6   \\\n",
-       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
-       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
-       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
-       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
-       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
-       "\n",
-       "         7         8         9   ...        16        17        18        19  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.104242 -0.373977 -0.252336 -0.227166   \n",
-       "1 -0.999830 -0.999825 -0.999825  ... -0.252336 -0.373635 -0.525701 -0.470726   \n",
-       "2 -0.999670 -0.999656 -0.999656  ... -0.237958 -0.373286 -0.408879 -0.348946   \n",
-       "3 -0.999499 -0.999485 -0.999494  ... -0.272466 -0.372968 -0.397196 -0.388759   \n",
-       "4 -0.999296 -0.999295 -0.999301  ... -0.216391 -0.372599 -0.282710 -0.271663   \n",
-       "\n",
-       "         20        21        22        23        24        25  \n",
-       "0 -0.201405 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975  \n",
-       "1 -0.482436 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853  \n",
-       "2 -0.337237 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043  \n",
-       "3 -0.421546 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731  \n",
-       "4 -0.210773 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205  \n",
-       "\n",
-       "[5 rows x 26 columns]"
-      ]
-     },
-     "execution_count": 29,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.set\n",
-    "\n",
-    "* Input: readings (dataframe)\n",
-    "* Output: readings (dataframe with turbine_id)\n",
-    "* Effect: turbine_id has been set as a readings column"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 30,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 7\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 31,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 31,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "      <th>3</th>\n",
-       "      <th>4</th>\n",
-       "      <th>5</th>\n",
-       "      <th>6</th>\n",
-       "      <th>7</th>\n",
-       "      <th>8</th>\n",
-       "      <th>9</th>\n",
-       "      <th>...</th>\n",
-       "      <th>17</th>\n",
-       "      <th>18</th>\n",
-       "      <th>19</th>\n",
-       "      <th>20</th>\n",
-       "      <th>21</th>\n",
-       "      <th>22</th>\n",
-       "      <th>23</th>\n",
-       "      <th>24</th>\n",
-       "      <th>25</th>\n",
-       "      <th>turbine_id</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.261261</td>\n",
-       "      <td>-0.237069</td>\n",
-       "      <td>-0.208701</td>\n",
-       "      <td>-0.141066</td>\n",
-       "      <td>-0.323288</td>\n",
-       "      <td>-0.259694</td>\n",
-       "      <td>-0.421988</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.373977</td>\n",
-       "      <td>-0.252336</td>\n",
-       "      <td>-0.227166</td>\n",
-       "      <td>-0.201405</td>\n",
-       "      <td>-0.134818</td>\n",
-       "      <td>-0.322392</td>\n",
-       "      <td>-0.253801</td>\n",
-       "      <td>-0.418224</td>\n",
-       "      <td>-0.256975</td>\n",
-       "      <td>T001</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-0.533490</td>\n",
-       "      <td>-0.478056</td>\n",
-       "      <td>-0.490888</td>\n",
-       "      <td>-0.385580</td>\n",
-       "      <td>-0.510372</td>\n",
-       "      <td>-0.443400</td>\n",
-       "      <td>-0.554384</td>\n",
-       "      <td>-0.999830</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.373635</td>\n",
-       "      <td>-0.525701</td>\n",
-       "      <td>-0.470726</td>\n",
-       "      <td>-0.482436</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.507620</td>\n",
-       "      <td>-0.440936</td>\n",
-       "      <td>-0.553738</td>\n",
-       "      <td>-0.480853</td>\n",
-       "      <td>T001</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.414414</td>\n",
-       "      <td>-0.359718</td>\n",
-       "      <td>-0.346267</td>\n",
-       "      <td>-0.297806</td>\n",
-       "      <td>-0.447358</td>\n",
-       "      <td>-0.360360</td>\n",
-       "      <td>-0.484866</td>\n",
-       "      <td>-0.999670</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.373286</td>\n",
-       "      <td>-0.408879</td>\n",
-       "      <td>-0.348946</td>\n",
-       "      <td>-0.337237</td>\n",
-       "      <td>-0.294256</td>\n",
-       "      <td>-0.439625</td>\n",
-       "      <td>-0.354386</td>\n",
-       "      <td>-0.483645</td>\n",
-       "      <td>-0.381043</td>\n",
-       "      <td>T001</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>-0.402664</td>\n",
-       "      <td>-0.396160</td>\n",
-       "      <td>-0.428180</td>\n",
-       "      <td>-0.374608</td>\n",
-       "      <td>-0.499022</td>\n",
-       "      <td>-0.464943</td>\n",
-       "      <td>-0.567663</td>\n",
-       "      <td>-0.999499</td>\n",
-       "      <td>-0.999485</td>\n",
-       "      <td>-0.999494</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.372968</td>\n",
-       "      <td>-0.397196</td>\n",
-       "      <td>-0.388759</td>\n",
-       "      <td>-0.421546</td>\n",
-       "      <td>-0.371630</td>\n",
-       "      <td>-0.495897</td>\n",
-       "      <td>-0.461988</td>\n",
-       "      <td>-0.565421</td>\n",
-       "      <td>-0.442731</td>\n",
-       "      <td>T001</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-0.280846</td>\n",
-       "      <td>-0.264107</td>\n",
-       "      <td>-0.184793</td>\n",
-       "      <td>-0.222962</td>\n",
-       "      <td>-0.336986</td>\n",
-       "      <td>-0.271837</td>\n",
-       "      <td>-0.354814</td>\n",
-       "      <td>-0.999296</td>\n",
-       "      <td>-0.999295</td>\n",
-       "      <td>-0.999301</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.372599</td>\n",
-       "      <td>-0.282710</td>\n",
-       "      <td>-0.271663</td>\n",
-       "      <td>-0.210773</td>\n",
-       "      <td>-0.237984</td>\n",
-       "      <td>-0.343494</td>\n",
-       "      <td>-0.295906</td>\n",
-       "      <td>-0.413551</td>\n",
-       "      <td>-0.289205</td>\n",
-       "      <td>T001</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 27 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "          0         1         2         3         4         5         6  \\\n",
-       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
-       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
-       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
-       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
-       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
-       "\n",
-       "          7         8         9  ...        17        18        19        20  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.373977 -0.252336 -0.227166 -0.201405   \n",
-       "1 -0.999830 -0.999825 -0.999825  ... -0.373635 -0.525701 -0.470726 -0.482436   \n",
-       "2 -0.999670 -0.999656 -0.999656  ... -0.373286 -0.408879 -0.348946 -0.337237   \n",
-       "3 -0.999499 -0.999485 -0.999494  ... -0.372968 -0.397196 -0.388759 -0.421546   \n",
-       "4 -0.999296 -0.999295 -0.999301  ... -0.372599 -0.282710 -0.271663 -0.210773   \n",
-       "\n",
-       "         21        22        23        24        25  turbine_id  \n",
-       "0 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975        T001  \n",
-       "1 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853        T001  \n",
-       "2 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043        T001  \n",
-       "3 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731        T001  \n",
-       "4 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205        T001  \n",
-       "\n",
-       "[5 rows x 27 columns]"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## pandas.DataFrame.set\n",
-    "\n",
-    "* Input: readings (dataframe with turbine_id)\n",
-    "* Output: readings (dataframe with turbine_id and timestamp)\n",
-    "* Effect: timestamp has been set as a readings column"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 33,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 8\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 34,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 35,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>0</th>\n",
-       "      <th>1</th>\n",
-       "      <th>2</th>\n",
-       "      <th>3</th>\n",
-       "      <th>4</th>\n",
-       "      <th>5</th>\n",
-       "      <th>6</th>\n",
-       "      <th>7</th>\n",
-       "      <th>8</th>\n",
-       "      <th>9</th>\n",
-       "      <th>...</th>\n",
-       "      <th>18</th>\n",
-       "      <th>19</th>\n",
-       "      <th>20</th>\n",
-       "      <th>21</th>\n",
-       "      <th>22</th>\n",
-       "      <th>23</th>\n",
-       "      <th>24</th>\n",
-       "      <th>25</th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>-0.261261</td>\n",
-       "      <td>-0.237069</td>\n",
-       "      <td>-0.208701</td>\n",
-       "      <td>-0.141066</td>\n",
-       "      <td>-0.323288</td>\n",
-       "      <td>-0.259694</td>\n",
-       "      <td>-0.421988</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>-1.000000</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.252336</td>\n",
-       "      <td>-0.227166</td>\n",
-       "      <td>-0.201405</td>\n",
-       "      <td>-0.134818</td>\n",
-       "      <td>-0.322392</td>\n",
-       "      <td>-0.253801</td>\n",
-       "      <td>-0.418224</td>\n",
-       "      <td>-0.256975</td>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>-0.533490</td>\n",
-       "      <td>-0.478056</td>\n",
-       "      <td>-0.490888</td>\n",
-       "      <td>-0.385580</td>\n",
-       "      <td>-0.510372</td>\n",
-       "      <td>-0.443400</td>\n",
-       "      <td>-0.554384</td>\n",
-       "      <td>-0.999830</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>-0.999825</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.525701</td>\n",
-       "      <td>-0.470726</td>\n",
-       "      <td>-0.482436</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.507620</td>\n",
-       "      <td>-0.440936</td>\n",
-       "      <td>-0.553738</td>\n",
-       "      <td>-0.480853</td>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 01:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>-0.414414</td>\n",
-       "      <td>-0.359718</td>\n",
-       "      <td>-0.346267</td>\n",
-       "      <td>-0.297806</td>\n",
-       "      <td>-0.447358</td>\n",
-       "      <td>-0.360360</td>\n",
-       "      <td>-0.484866</td>\n",
-       "      <td>-0.999670</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>-0.999656</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.408879</td>\n",
-       "      <td>-0.348946</td>\n",
-       "      <td>-0.337237</td>\n",
-       "      <td>-0.294256</td>\n",
-       "      <td>-0.439625</td>\n",
-       "      <td>-0.354386</td>\n",
-       "      <td>-0.483645</td>\n",
-       "      <td>-0.381043</td>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 02:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>-0.402664</td>\n",
-       "      <td>-0.396160</td>\n",
-       "      <td>-0.428180</td>\n",
-       "      <td>-0.374608</td>\n",
-       "      <td>-0.499022</td>\n",
-       "      <td>-0.464943</td>\n",
-       "      <td>-0.567663</td>\n",
-       "      <td>-0.999499</td>\n",
-       "      <td>-0.999485</td>\n",
-       "      <td>-0.999494</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.397196</td>\n",
-       "      <td>-0.388759</td>\n",
-       "      <td>-0.421546</td>\n",
-       "      <td>-0.371630</td>\n",
-       "      <td>-0.495897</td>\n",
-       "      <td>-0.461988</td>\n",
-       "      <td>-0.565421</td>\n",
-       "      <td>-0.442731</td>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 03:00:00</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>-0.280846</td>\n",
-       "      <td>-0.264107</td>\n",
-       "      <td>-0.184793</td>\n",
-       "      <td>-0.222962</td>\n",
-       "      <td>-0.336986</td>\n",
-       "      <td>-0.271837</td>\n",
-       "      <td>-0.354814</td>\n",
-       "      <td>-0.999296</td>\n",
-       "      <td>-0.999295</td>\n",
-       "      <td>-0.999301</td>\n",
-       "      <td>...</td>\n",
-       "      <td>-0.282710</td>\n",
-       "      <td>-0.271663</td>\n",
-       "      <td>-0.210773</td>\n",
-       "      <td>-0.237984</td>\n",
-       "      <td>-0.343494</td>\n",
-       "      <td>-0.295906</td>\n",
-       "      <td>-0.413551</td>\n",
-       "      <td>-0.289205</td>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 04:00:00</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 28 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "          0         1         2         3         4         5         6  \\\n",
-       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
-       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
-       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
-       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
-       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
-       "\n",
-       "          7         8         9  ...        18        19        20        21  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.252336 -0.227166 -0.201405 -0.134818   \n",
-       "1 -0.999830 -0.999825 -0.999825  ... -0.525701 -0.470726 -0.482436 -0.378664   \n",
-       "2 -0.999670 -0.999656 -0.999656  ... -0.408879 -0.348946 -0.337237 -0.294256   \n",
-       "3 -0.999499 -0.999485 -0.999494  ... -0.397196 -0.388759 -0.421546 -0.371630   \n",
-       "4 -0.999296 -0.999295 -0.999301  ... -0.282710 -0.271663 -0.210773 -0.237984   \n",
-       "\n",
-       "         22        23        24        25  turbine_id           timestamp  \n",
-       "0 -0.322392 -0.253801 -0.418224 -0.256975        T001 2013-01-10 00:00:00  \n",
-       "1 -0.507620 -0.440936 -0.553738 -0.480853        T001 2013-01-10 01:00:00  \n",
-       "2 -0.439625 -0.354386 -0.483645 -0.381043        T001 2013-01-10 02:00:00  \n",
-       "3 -0.495897 -0.461988 -0.565421 -0.442731        T001 2013-01-10 03:00:00  \n",
-       "4 -0.343494 -0.295906 -0.413551 -0.289205        T001 2013-01-10 04:00:00  \n",
-       "\n",
-       "[5 rows x 28 columns]"
-      ]
-     },
-     "execution_count": 35,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences\n",
-    "\n",
-    "* Input: X, readings (dataframe with turbine_id and timestamp)\n",
-    "* Output: X\n",
-    "* Effect: X has been converted to a 3d numpy array that contains 1 matrix of shape\n",
-    "  (window_size x num_signals) for each one of the target times."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 36,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}"
-      ]
-     },
-     "execution_count": 36,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pipeline._pipeline.get_hyperparameters()[\n",
-    "    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 37,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 9\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 38,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 38,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 39,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(8521, 28)"
-      ]
-     },
-     "execution_count": 39,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 40,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(353,)"
-      ]
-     },
-     "execution_count": 40,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['y'].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 41,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(353, 24, 26)"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['X'].shape"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 42,
-   "metadata": {
-    "scrolled": false
-   },
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "array([[-0.58793576, -0.60305643, -0.63981971, -0.61481191, -0.69823875,\n",
-       "        -0.65021543, -0.68912322, -0.99436914, -0.99439755, -0.99454249,\n",
-       "        -0.99446788, -0.99476185, -0.99490997, -0.99529511, -0.34701493,\n",
-       "        -0.33886256, -0.33860532, -0.36301186, -0.57943925, -0.59250585,\n",
-       "        -0.6323185 , -0.60609613, -0.69284877, -0.64444444, -0.68691589,\n",
-       "        -0.63853752],\n",
-       "       [-0.56600078, -0.5846395 , -0.63002156, -0.61559561, -0.70880626,\n",
-       "        -0.66392479, -0.69732474, -0.9942427 , -0.99427986, -0.9944408 ,\n",
-       "        -0.99436498, -0.99468147, -0.99482011, -0.99521249, -0.33955224,\n",
-       "        -0.31516588, -0.38892883, -0.36280656, -0.55841121, -0.57611241,\n",
-       "        -0.62295082, -0.61078546, -0.70222743, -0.65847953, -0.69392523,\n",
-       "        -0.63645815],\n",
-       "       [-0.64081473, -0.64184953, -0.67038997, -0.63597179, -0.71350294,\n",
-       "        -0.65844105, -0.66764304, -0.99412236, -0.99416864, -0.99434228,\n",
-       "        -0.99426059, -0.99459663, -0.99472365, -0.99511795, -0.34328358,\n",
-       "        -0.30094787, -0.36304817, -0.36259859, -0.63317757, -0.6323185 ,\n",
-       "        -0.66042155, -0.62954279, -0.70926143, -0.65380117, -0.66588785,\n",
-       "        -0.66002426]])"
-      ]
-     },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['X'][0][:3]"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## keras.Sequential.LSTMTimeSeriesClassifier\n",
-    "\n",
-    "* Input: X, y\n",
-    "* Output: \n",
-    "* Effect: LSTM has been fitted."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 10\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.6.12"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From ffd512959df111d08a05226ea3c98abf07230414 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Mon, 11 Apr 2022 15:28:57 -0400
Subject: [PATCH 156/171] Update dependencies (#67)

* update deps

* add numpy and pandas

* pin minimum for Jinja2
---
 .gitignore                       |  1 +
 draco/pipeline.py                | 12 ++++++++----
 draco/pipelines/dummy/dummy.json | 11 +++++++++++
 setup.py                         |  6 ++----
 tests/test_pipeline.py           | 13 +++++++++++++
 5 files changed, 35 insertions(+), 8 deletions(-)
 create mode 100644 draco/pipelines/dummy/dummy.json

diff --git a/.gitignore b/.gitignore
index b4e035b..25331c7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -113,4 +113,5 @@ scripts/
 dask-worker-space/
 tutorials/*.pkl
 
+*.pkl
 *.DS_Store
diff --git a/draco/pipeline.py b/draco/pipeline.py
index 98fb3d7..6a9adf6 100644
--- a/draco/pipeline.py
+++ b/draco/pipeline.py
@@ -9,7 +9,6 @@
 from copy import deepcopy
 from hashlib import md5
 
-import cloudpickle
 import keras
 import numpy as np
 from btb import BTBSession
@@ -612,14 +611,14 @@ def predict(self, target_times=None, readings=None, turbines=None,
         return predictions
 
     def save(self, path):
-        """Serialize and save this pipeline using cloudpickle.
+        """Serialize and save this pipeline using pickle.
 
         Args:
             path (str):
                 Path to the file where the pipeline will be saved.
         """
         with open(path, 'wb') as pickle_file:
-            cloudpickle.dump(self, pickle_file)
+            pickle.dump(self, pickle_file)
 
     @classmethod
     def load(cls, path):
@@ -634,4 +633,9 @@ def load(cls, path):
                 Loaded DracoPipeline instance.
         """
         with open(path, 'rb') as pickle_file:
-            return cloudpickle.load(pickle_file)
+            pipeline = pickle.load(pickle_file)
+
+        if not isinstance(pipeline, cls):
+            raise ValueError('Serialized object is not a DracoPipeline')
+
+        return pipeline
diff --git a/draco/pipelines/dummy/dummy.json b/draco/pipelines/dummy/dummy.json
new file mode 100644
index 0000000..a28121e
--- /dev/null
+++ b/draco/pipelines/dummy/dummy.json
@@ -0,0 +1,11 @@
+{
+    "primitives": [
+        "mlprimitives.custom.preprocessing.ClassEncoder",
+        "mlprimitives.custom.feature_extraction.DatetimeFeaturizer",
+        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
+        "mlprimitives.custom.feature_extraction.StringVectorizer",
+        "sklearn.impute.SimpleImputer",
+        "sklearn.linear_model.LogisticRegression",
+        "mlprimitives.custom.preprocessing.ClassDecoder"
+    ]
+}
\ No newline at end of file
diff --git a/setup.py b/setup.py
index 0157b7c..5c2de23 100644
--- a/setup.py
+++ b/setup.py
@@ -22,15 +22,12 @@
     'pymongo>=3.7.2,<4',
     'scikit-learn>=0.21',
     'tqdm<4.50.0,>=4.36.1',
-    'cloudpickle>=1.6,<2',
     'scipy>=1.0.1,<2',
-    'numpy<1.19.0,>=1.16.0',
+    'numpy>=1.16.0,<1.21.0',
     'pandas>=1,<2',
     'partd>=1.1.0,<2',
     'fsspec>=0.8.5,<0.9',
     'dask>=2.6.0,<3',
-    'distributed>=2.6.0,<3',
-    'h5py<2.11.0,>=2.10.0',  # fix tensorflow requirement
     'tabulate>=0.8.3,<0.9',
     'xlsxwriter>=1.3.6<1.4',
 ]
@@ -59,6 +56,7 @@
     'sphinx_rtd_theme>=0.2.4,<0.5',
     'docutils>=0.14,<0.18',
     'autodocsumm>=0.1.10',
+    'Jinja2>=2,<3',
 
     # style check
     'flake8>=3.7.7,<4',
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index f8526c9..3b7359f 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -67,3 +67,16 @@ def test_predict(self, load_pipeline_mock, mlpipeline_mock):
         instance.fitted = True
         target_times, readings = self._get_data()
         instance.predict(target_times, readings)
+
+    def test_save_load(self):
+        file = 'path.pkl'
+
+        # Run
+        instance = DracoPipeline('dummy', 'accuracy')
+        instance.save(file)
+        new_instance = DracoPipeline.load(file)
+
+        # Asserts
+        assert isinstance(new_instance, instance.__class__)
+        assert instance.template == new_instance.template
+        assert instance.fitted == new_instance.fitted

From a15783cc84750ec19bbb485670bdd2e6069a66bd Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Mon, 11 Apr 2022 17:44:03 -0400
Subject: [PATCH 157/171] Fix Doc Generation (#68)

* add docs test

* pin doc deps
---
 .github/workflows/tests.yml | 17 +++++++++++++++++
 setup.py                    |  1 +
 2 files changed, 18 insertions(+)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index d4c79b9..6d73a25 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -7,6 +7,23 @@ on:
     branches: [ master ]
 
 jobs:
+  docs:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.8]
+        os: [ubuntu-latest]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install package
+      run: python -m pip install .[dev]
+    - name: make docs
+      run: make docs
+
   lint:
     runs-on: ${{ matrix.os }}
     strategy:
diff --git a/setup.py b/setup.py
index 5c2de23..69d188e 100644
--- a/setup.py
+++ b/setup.py
@@ -56,6 +56,7 @@
     'sphinx_rtd_theme>=0.2.4,<0.5',
     'docutils>=0.14,<0.18',
     'autodocsumm>=0.1.10',
+    'markupsafe<2.1.0',
     'Jinja2>=2,<3',
 
     # style check

From 0a678f175065e4eee01b2f5d856ddaa5a7bcdccd Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Tue, 12 Apr 2022 03:25:19 -0400
Subject: [PATCH 158/171] Prepare Release v0.2.0 (#69)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* update release commands

* Bump version: 0.1.1.dev0 → 0.1.1.dev1

* cap tensorflow

* Bump version: 0.1.1.dev1 → 0.1.1.dev2

* add release notes
---
 HISTORY.md        | 11 +++++++++++
 Makefile          |  8 +++++++-
 draco/__init__.py |  2 +-
 setup.cfg         |  2 +-
 setup.py          |  3 ++-
 5 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index 84a28d5..539ca0e 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,9 +1,20 @@
 # History
 
+
+## 0.2.0 - 2022-04-12
+
+This release features a reorganization and renaming of ``Draco`` pipelines. In addtion,
+we update some of the dependencies for general housekeeping.
+
+* Update Draco dependencies - [Issue #66](https://github.com/signals-dev/Draco/issues/66) by @sarahmish
+* Reorganize pipelines - [Issue #63](https://github.com/signals-dev/Draco/issues/63) by @sarahmish
+
+
 ## 0.1.0 - 2022-01-01
 
 * First release on ``draco-ml`` PyPI
 
+
 ## Previous GreenGuard development
 
 ### 0.3.0 - 2021-01-22
diff --git a/Makefile b/Makefile
index a6ad0e5..590d8bc 100644
--- a/Makefile
+++ b/Makefile
@@ -256,7 +256,7 @@ check-release: check-candidate check-clean check-master check-history ## Check i
 	@echo "A new release can be made"
 
 .PHONY: release
-release: check-release bumpversion-release docker-push publish bumpversion-patch
+release: check-release bumpversion-release publish bumpversion-patch
 
 .PHONY: release-test
 release-test: check-release bumpversion-release-test publish-test bumpversion-revert
@@ -267,6 +267,12 @@ release-candidate: check-master publish bumpversion-candidate
 .PHONY: release-candidate-test
 release-candidate-test: check-clean check-master publish-test
 
+.PHONY: release-minor
+release-minor: check-release bumpversion-minor release
+
+.PHONY: release-major
+release-major: check-release bumpversion-major release
+
 
 # DOCKER TARGETS
 
diff --git a/draco/__init__.py b/draco/__init__.py
index a1a5e8a..94384c3 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.1.1.dev0'
+__version__ = '0.1.1.dev2'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 597575d..3113a93 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.1.dev0
+current_version = 0.1.1.dev2
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 69d188e..f73f2ab 100644
--- a/setup.py
+++ b/setup.py
@@ -25,6 +25,7 @@
     'scipy>=1.0.1,<2',
     'numpy>=1.16.0,<1.21.0',
     'pandas>=1,<2',
+    'tensorflow>=2,<2.3',
     'partd>=1.1.0,<2',
     'fsspec>=0.8.5,<0.9',
     'dask>=2.6.0,<3',
@@ -114,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.1.1.dev0',
+    version='0.1.1.dev2',
     zip_safe=False,
 )

From 49d69e670d2f8ec36746b2cfd4f3893ffecf2065 Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Tue, 12 Apr 2022 03:28:18 -0400
Subject: [PATCH 159/171] =?UTF-8?q?Bump=20version:=200.1.1.dev2=20?=
 =?UTF-8?q?=E2=86=92=200.2.0.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index 94384c3..e11da10 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.1.1.dev2'
+__version__ = '0.2.0.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 3113a93..a225134 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.1.1.dev2
+current_version = 0.2.0.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index f73f2ab..39ba79e 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.1.1.dev2',
+    version='0.2.0.dev0',
     zip_safe=False,
 )

From 9aa2b2203fd8c24d965c15c98d6409ead33f5ec0 Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Tue, 12 Apr 2022 03:28:19 -0400
Subject: [PATCH 160/171] =?UTF-8?q?Bump=20version:=200.2.0.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.2.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index e11da10..8f1402d 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.0.dev0'
+__version__ = '0.2.0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index a225134..d366bb3 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.0.dev0
+current_version = 0.2.0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 39ba79e..a104b00 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.2.0.dev0',
+    version='0.2.0',
     zip_safe=False,
 )

From fa13eb0d52e82cf86d8ec8bea5409519902da1a4 Mon Sep 17 00:00:00 2001
From: sarahmish <sarahalnegheimish@gmail.com>
Date: Tue, 12 Apr 2022 03:28:34 -0400
Subject: [PATCH 161/171] =?UTF-8?q?Bump=20version:=200.2.0=20=E2=86=92=200?=
 =?UTF-8?q?.2.1.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index 8f1402d..e134da2 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.0'
+__version__ = '0.2.1.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index d366bb3..e78faaa 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.0
+current_version = 0.2.1.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index a104b00..d5deb41 100644
--- a/setup.py
+++ b/setup.py
@@ -115,6 +115,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.2.0',
+    version='0.2.1.dev0',
     zip_safe=False,
 )

From 03ab7c9f4b5e2bddff3ee4c4f26b1faf30ef7941 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Fri, 24 Feb 2023 17:45:05 -0600
Subject: [PATCH 162/171] Update packages and project description (#70)

* remove wind industries

* pin `protobuf`

* pin metadata

* pin ubuntu test version

* pin importlib
---
 .github/workflows/tests.yml | 52 ++++++++++++++++++-------------------
 README.md                   |  4 +--
 docs/conf.py                |  2 +-
 setup.py                    |  7 +++--
 4 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 6d73a25..17d140f 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -16,7 +16,7 @@ jobs:
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install package
@@ -29,30 +29,30 @@ jobs:
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
-        os: [ubuntu-latest]
+        os: [ubuntu-20.04]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install tox tox-gh-actions
-    - name: Test with tox
-      run: tox -e lint
+        pip install .[dev]
+    - name: make lint
+      run: make lint
 
   readme:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
-        os: [ubuntu-latest]
+        os: [ubuntu-20.04]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install libgomp1
@@ -61,63 +61,63 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install tox tox-gh-actions
-    - name: Test with tox
-      run: tox -e readme
+        pip install rundoc .
+    - name: make readme
+      run: make test-readme
 
   unit:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
-        os: [ubuntu-latest, macos-10.15]
+        os: [ubuntu-20.04, macos-10.15]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install tox tox-gh-actions
-    - name: Test with tox
-      run: tox -e unit
+        pip install .[test]
+    - name: make unit
+      run: make test-unit
 
   minimum:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
-        os: [ubuntu-latest]
+        os: [ubuntu-20.04]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install tox tox-gh-actions
-    - name: Test with tox
-      run: tox -e minimum
+        pip install .[test]
+    - name: make minimum
+      run: make test-minimum
 
   tutorials:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
-        os: [ubuntu-latest]
+        os: [ubuntu-20.04]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v1
+      uses: actions/setup-python@v2
       with:
         python-version: ${{ matrix.python-version }}
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install tox tox-gh-actions
-    - name: Test with tox
-      run: tox -e tutorials
+        pip install jupyter .
+    - name: make tutorials
+      run: make test-tutorials
diff --git a/README.md b/README.md
index 2d398e4..08effaa 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 </p>
 
 <p align="left">
-AutoML for Renewable Energy Industries.
+AutoML for Time Series.
 </p>
 
 
@@ -29,7 +29,7 @@ AutoML for Renewable Energy Industries.
 ## Overview
 
 The Draco project is a collection of end-to-end solutions for machine learning problems
-commonly found in monitoring wind energy production systems. Most tasks utilize sensor data
+commonly found in time series monitoring systems. Most tasks utilize sensor data
 emanating from monitoring systems. We utilize the foundational innovations developed for
 automation of machine Learning at Data to AI Lab at MIT.
 
diff --git a/docs/conf.py b/docs/conf.py
index ecd0023..e01a346 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -63,7 +63,7 @@
 title = project + ' Documentation',
 copyright = '2018, MIT Data To AI Lab'
 author = 'MIT Data To AI Lab'
-description = 'AutoML for Renewable Energy Industries'
+description = 'AutoML for Time Series'
 user = 'sintel-dev'
 
 # The version info for the project you're documenting, acts as replacement
diff --git a/setup.py b/setup.py
index d5deb41..0118bd0 100644
--- a/setup.py
+++ b/setup.py
@@ -31,6 +31,9 @@
     'dask>=2.6.0,<3',
     'tabulate>=0.8.3,<0.9',
     'xlsxwriter>=1.3.6<1.4',
+    # fix conflicts
+    'protobuf<4',
+    'importlib-metadata<2,>=0.12',
 ]
 
 setup_requires = [
@@ -67,6 +70,7 @@
     # fix style issues
     'autoflake>=1.1,<2',
     'autopep8>=1.4.3,<2',
+    'importlib-metadata<5',
 
     # distribute on PyPI
     'twine>=1.10.0,<4',
@@ -75,7 +79,6 @@
     # Advanced testing
     'coverage>=4.5.1,<6',
     'tox>=2.9.1,<4',
-    'importlib-metadata<2,>=0.12',
 ]
 
 setup(
@@ -91,7 +94,7 @@
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
     ],
-    description='AutoML for Renewable Energy Industries.',
+    description='AutoML for Time Series.',
     entry_points={
         'mlblocks': [
             'pipelines=draco:MLBLOCKS_PIPELINES',

From 854391fbff6d8334ddd221bf248dca77e9656859 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Thu, 13 Apr 2023 16:25:17 -0400
Subject: [PATCH 163/171] Remove dfs pipelines (#74)

---
 README.md                                     |   13 +-
 draco/pipelines/dfs_xgb/dfs_xgb.json          |   29 -
 ...fs_xgb_prob_with_double_normalization.json |   42 -
 .../dfs_xgb/dfs_xgb_prob_with_unstack.json    |   50 -
 ...s_xgb_prob_with_unstack_normalization.json |   49 -
 .../dfs_xgb_with_double_normalization.json    |   37 -
 .../dfs_xgb/dfs_xgb_with_normalization.json   |   29 -
 .../dfs_xgb/dfs_xgb_with_unstack.json         |   45 -
 .../dfs_xgb_with_unstack_normalization.json   |   44 -
 .../double_lstm_prob_with_unstack.json        |   37 +-
 .../double_lstm/double_lstm_with_unstack.json |   37 +-
 .../lstm/lstm_prob_with_unstack.json          |   37 +-
 draco/pipelines/lstm/lstm_with_unstack.json   |   37 +-
 .../lstm_regressor_with_unstack.json          |   39 +-
 .../double_entity_normalization.json          |   51 -
 .../preprocessing/entity_dataframe.json       |   26 -
 .../preprocessing/entity_normalization.json   |   20 -
 draco/pipelines/preprocessing/unstack.json    |   43 -
 setup.py                                      |    7 +-
 tests/test_benchmark.py                       |    2 +-
 tutorials/01_Draco_Machine_Learning.ipynb     |  236 ++-
 tutorials/03_Benchmarking.ipynb               |  253 ++-
 tutorials/04_Draco_Regression_Pipeline.ipynb  |   81 +-
 .../dfs_xgb_with_double_normalization.ipynb   | 1363 --------------
 .../dfs_xgb_with_unstack_normalization.ipynb  | 1611 -----------------
 25 files changed, 419 insertions(+), 3799 deletions(-)
 delete mode 100644 draco/pipelines/dfs_xgb/dfs_xgb.json
 delete mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_prob_with_double_normalization.json
 delete mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack.json
 delete mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack_normalization.json
 delete mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_with_double_normalization.json
 delete mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_with_normalization.json
 delete mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_with_unstack.json
 delete mode 100644 draco/pipelines/dfs_xgb/dfs_xgb_with_unstack_normalization.json
 delete mode 100644 draco/pipelines/preprocessing/double_entity_normalization.json
 delete mode 100644 draco/pipelines/preprocessing/entity_dataframe.json
 delete mode 100644 draco/pipelines/preprocessing/entity_normalization.json
 delete mode 100644 draco/pipelines/preprocessing/unstack.json
 delete mode 100644 tutorials/pipelines/dfs_xgb_with_double_normalization.ipynb
 delete mode 100644 tutorials/pipelines/dfs_xgb_with_unstack_normalization.ipynb

diff --git a/README.md b/README.md
index 08effaa..365c442 100644
--- a/README.md
+++ b/README.md
@@ -220,18 +220,17 @@ The returned `pipeline` variable will be `list` containing the names of all the
 available in the Draco system:
 
 ```
-['dfs_xgb',
- 'dfs_xgb_with_unstack',
- 'dfs_xgb_with_normalization',
- 'dfs_xgb_with_unstack_normalization',
- 'dfs_xgb_prob_with_unstack_normalization']
+['lstm',
+ 'lstm_with_unstack',
+ 'double_lstm',
+ 'double_lstm_with_unstack']
 ```
 
 For the rest of this tutorial, we will select and use the pipeline
-`dfs_xgb_with_unstack_normalization` as our template.
+`lstm_with_unstack` as our template.
 
 ```python3
-pipeline_name = 'dfs_xgb_with_unstack_normalization'
+pipeline_name = 'lstm_with_unstack'
 ```
 
 ## 3. Fitting the Pipeline
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb.json b/draco/pipelines/dfs_xgb/dfs_xgb.json
deleted file mode 100644
index 4cb3cbf..0000000
--- a/draco/pipelines/dfs_xgb/dfs_xgb.json
+++ /dev/null
@@ -1,29 +0,0 @@
-{
-    "primitives": [
-        "mlblocks.MLPipeline",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "preprocessing.entity_dataframe"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        }
-    },
-    "input_names": {
-        "mlblocks.MLPipeline#1": {
-            "dataframe": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_double_normalization.json b/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_double_normalization.json
deleted file mode 100644
index 4231115..0000000
--- a/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_double_normalization.json
+++ /dev/null
@@ -1,42 +0,0 @@
-{
-    "primitives": [
-        "mlblocks.MLPipeline",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier:probabilities",
-        "numpy.take"
-    ],
-    "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "preprocessing.double_entity_normalization",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "featuretools.EntitySet.entity_from_dataframe#1": {
-                    "dataframe": "readings"
-                }
-            }
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        },
-        "numpy.take#1": {
-            "indices": 1,
-            "axis": 1
-        }
-    },
-    "input_names": {
-        "mlblocks.MLPipeline#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack.json b/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack.json
deleted file mode 100644
index 03ef141..0000000
--- a/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack.json
+++ /dev/null
@@ -1,50 +0,0 @@
-{
-    "primitives": [
-        "mlblocks.MLPipeline",
-        "mlblocks.MLPipeline",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier:probabilities",
-        "numpy.take"
-    ],
-    "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "preprocessing.unstack",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "pandas.DataFrame.unstack#1": {
-                    "X": "readings"
-                }
-            }
-        },
-        "mlblocks.MLPipeline#2": {
-            "pipeline": "preprocessing.entity_dataframe"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        },
-        "numpy.take#1": {
-            "indices": 1,
-            "axis": 1
-        }
-    },
-    "input_names": {
-        "mlblocks.MLPipeline#1": {
-            "X": "readings"
-        },
-        "mlblocks.MLPipeline#2": {
-            "dataframe": "readings",
-            "turbines": "turbines"
-        }
-    }
-}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack_normalization.json b/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack_normalization.json
deleted file mode 100644
index ca0c4fa..0000000
--- a/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack_normalization.json
+++ /dev/null
@@ -1,49 +0,0 @@
-{
-    "primitives": [
-        "mlblocks.MLPipeline",
-        "mlblocks.MLPipeline",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier:probabilities",
-        "numpy.take"
-    ],
-    "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "preprocessing.unstack",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "pandas.DataFrame.unstack#1": {
-                    "X": "readings"
-                }
-            }
-        },
-        "mlblocks.MLPipeline#2": {
-            "pipeline": "preprocessing.entity_normalization"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        },
-        "numpy.take#1": {
-            "indices": 1,
-            "axis": 1
-        }
-    },
-    "input_names": {
-        "mlblocks.MLPipeline#1": {
-            "X": "readings"
-        },
-        "mlblocks.MLPipeline#2": {
-            "dataframe": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_with_double_normalization.json b/draco/pipelines/dfs_xgb/dfs_xgb_with_double_normalization.json
deleted file mode 100644
index 82ae325..0000000
--- a/draco/pipelines/dfs_xgb/dfs_xgb_with_double_normalization.json
+++ /dev/null
@@ -1,37 +0,0 @@
-{
-    "primitives": [
-        "mlblocks.MLPipeline",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "preprocessing.double_entity_normalization",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "featuretools.EntitySet.entity_from_dataframe#1": {
-                    "dataframe": "readings"
-                }
-            }
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        }
-    },
-    "input_names": {
-        "mlblocks.MLPipeline#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_with_normalization.json b/draco/pipelines/dfs_xgb/dfs_xgb_with_normalization.json
deleted file mode 100644
index d9d7911..0000000
--- a/draco/pipelines/dfs_xgb/dfs_xgb_with_normalization.json
+++ /dev/null
@@ -1,29 +0,0 @@
-{
-    "primitives": [
-        "mlblocks.MLPipeline",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "preprocessing.entity_normalization"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        }
-    },
-    "input_names": {
-        "mlblocks.MLPipeline#1": {
-            "dataframe": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack.json b/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack.json
deleted file mode 100644
index dd01f23..0000000
--- a/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack.json
+++ /dev/null
@@ -1,45 +0,0 @@
-{
-    "primitives": [
-        "mlblocks.MLPipeline",
-        "mlblocks.MLPipeline",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "preprocessing.unstack",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "pandas.DataFrame.unstack#1": {
-                    "X": "readings"
-                }
-            }
-        },
-        "mlblocks.MLPipeline#2": {
-            "pipeline": "preprocessing.entity_dataframe"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        }
-    },
-    "input_names": {
-        "mlblocks.MLPipeline#1": {
-            "X": "readings"
-        },
-        "mlblocks.MLPipeline#2": {
-            "dataframe": "readings",
-            "turbines": "turbines"
-        }
-    }
-}
diff --git a/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack_normalization.json b/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack_normalization.json
deleted file mode 100644
index 87e6999..0000000
--- a/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack_normalization.json
+++ /dev/null
@@ -1,44 +0,0 @@
-{
-    "primitives": [
-        "mlblocks.MLPipeline",
-        "mlblocks.MLPipeline",
-        "featuretools.dfs",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "xgboost.XGBClassifier"
-    ],
-    "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "preprocessing.unstack",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "pandas.DataFrame.unstack#1": {
-                    "X": "readings"
-                }
-            }
-        },
-        "mlblocks.MLPipeline#2": {
-            "pipeline": "preprocessing.entity_normalization"
-        },
-        "featuretools.dfs#1": {
-            "target_entity": "turbines",
-            "index": "turbine_id",
-            "time_index": "cutoff_time",
-            "encode": false,
-            "max_depth": -1,
-            "copy": true,
-            "verbose": false,
-            "n_jobs": 1,
-            "training_window": "1d"
-        }
-    },
-    "input_names": {
-        "mlblocks.MLPipeline#1": {
-            "X": "readings"
-        },
-        "mlblocks.MLPipeline#2": {
-            "dataframe": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json b/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json
index 289a794..ea48a87 100644
--- a/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json
+++ b/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json
@@ -1,6 +1,7 @@
 {
     "primitives": [
-        "mlblocks.MLPipeline",
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
         "pandas.DataFrame.pop",
         "pandas.DataFrame.pop",
         "sklearn.impute.SimpleImputer",
@@ -13,16 +14,19 @@
         "numpy.take"
     ],
     "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "unstack",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "pandas.DataFrame.unstack#1": {
-                    "X": "readings"
-                }
-            }
+        "pandas.DataFrame.resample#1": {
+            "rule": "3600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
         },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
@@ -63,7 +67,10 @@
         }
     },
     "input_names": {
-        "mlblocks.MLPipeline#1": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
             "X": "readings"
         },
         "pandas.DataFrame.pop#1": {
@@ -94,6 +101,12 @@
         }
     },
     "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
         },
diff --git a/draco/pipelines/double_lstm/double_lstm_with_unstack.json b/draco/pipelines/double_lstm/double_lstm_with_unstack.json
index 1d08259..dede502 100644
--- a/draco/pipelines/double_lstm/double_lstm_with_unstack.json
+++ b/draco/pipelines/double_lstm/double_lstm_with_unstack.json
@@ -1,6 +1,7 @@
 {
     "primitives": [
-        "mlblocks.MLPipeline",
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
         "pandas.DataFrame.pop",
         "pandas.DataFrame.pop",
         "sklearn.impute.SimpleImputer",
@@ -12,16 +13,19 @@
         "keras.Sequential.DoubleLSTMTimeSeriesClassifier"
     ],
     "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "unstack",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "pandas.DataFrame.unstack#1": {
-                    "X": "readings"
-                }
-            }
+        "pandas.DataFrame.resample#1": {
+            "rule": "3600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
         },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
@@ -56,7 +60,10 @@
         }
     },
     "input_names": {
-        "mlblocks.MLPipeline#1": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
             "X": "readings"
         },
         "pandas.DataFrame.pop#1": {
@@ -87,6 +94,12 @@
         }
     },
     "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
         },
diff --git a/draco/pipelines/lstm/lstm_prob_with_unstack.json b/draco/pipelines/lstm/lstm_prob_with_unstack.json
index 1ad69bc..9272257 100644
--- a/draco/pipelines/lstm/lstm_prob_with_unstack.json
+++ b/draco/pipelines/lstm/lstm_prob_with_unstack.json
@@ -1,6 +1,7 @@
 {
     "primitives": [
-        "mlblocks.MLPipeline",
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
         "pandas.DataFrame.pop",
         "pandas.DataFrame.pop",
         "sklearn.impute.SimpleImputer",
@@ -13,16 +14,19 @@
         "numpy.take"
     ],
     "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "unstack",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "pandas.DataFrame.unstack#1": {
-                    "X": "readings"
-                }
-            }
+        "pandas.DataFrame.resample#1": {
+            "rule": "3600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
         },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
@@ -63,7 +67,10 @@
         }
     },
     "input_names": {
-        "mlblocks.MLPipeline#1": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
             "X": "readings"
         },
         "pandas.DataFrame.pop#1": {
@@ -94,6 +101,12 @@
         }
     },
     "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
         },
diff --git a/draco/pipelines/lstm/lstm_with_unstack.json b/draco/pipelines/lstm/lstm_with_unstack.json
index 18c486a..ab9dd99 100644
--- a/draco/pipelines/lstm/lstm_with_unstack.json
+++ b/draco/pipelines/lstm/lstm_with_unstack.json
@@ -1,6 +1,7 @@
 {
     "primitives": [
-        "mlblocks.MLPipeline",
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
         "pandas.DataFrame.pop",
         "pandas.DataFrame.pop",
         "sklearn.impute.SimpleImputer",
@@ -12,16 +13,19 @@
         "keras.Sequential.LSTMTimeSeriesClassifier"
     ],
     "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "unstack",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "pandas.DataFrame.unstack#1": {
-                    "X": "readings"
-                }
-            }
+        "pandas.DataFrame.resample#1": {
+            "rule": "3600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
         },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
@@ -56,7 +60,10 @@
         }
     },
     "input_names": {
-        "mlblocks.MLPipeline#1": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
             "X": "readings"
         },
         "pandas.DataFrame.pop#1": {
@@ -87,6 +94,12 @@
         }
     },
     "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
         },
diff --git a/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json b/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json
index d546827..9e183b9 100644
--- a/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json
+++ b/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json
@@ -1,6 +1,7 @@
 {
     "primitives": [
-        "mlblocks.MLPipeline",
+        "pandas.DataFrame.resample",
+        "pandas.DataFrame.unstack",
         "pandas.DataFrame.pop",
         "pandas.DataFrame.pop",
         "sklearn.impute.SimpleImputer",
@@ -12,16 +13,19 @@
         "keras.Sequential.LSTMTimeSeriesRegressor"
     ],
     "init_params": {
-        "mlblocks.MLPipeline#1": {
-            "pipeline": "unstack",
-            "input_names": {
-                "pandas.DataFrame.resample#1": {
-                    "X": "df"
-                },
-                "pandas.DataFrame.unstack#1": {
-                    "X": "readings"
-                }
-            }
+        "pandas.DataFrame.resample#1": {
+            "rule": "600s",
+            "on": "timestamp",
+            "groupby": [
+                "turbine_id",
+                "signal_id"
+            ],
+            "aggregation": "mean",
+            "reset_index": false
+        },
+        "pandas.DataFrame.unstack#1": {
+            "level": "signal_id",
+            "reset_index": true
         },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
@@ -52,11 +56,14 @@
         },
         "keras.Sequential.LSTMTimeSeriesRegressor#1": {
             "epochs": 35,
-            "verbose": false
+            "verbose": true
         }
     },
     "input_names": {
-        "mlblocks.MLPipeline#1": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
             "X": "readings"
         },
         "pandas.DataFrame.pop#1": {
@@ -87,6 +94,12 @@
         }
     },
     "output_names": {
+        "pandas.DataFrame.resample#1": {
+            "X": "readings"
+        },
+        "pandas.DataFrame.unstack#1": {
+            "X": "readings"
+        },
         "pandas.DataFrame.pop#1": {
             "item": "turbine_id"
         },
diff --git a/draco/pipelines/preprocessing/double_entity_normalization.json b/draco/pipelines/preprocessing/double_entity_normalization.json
deleted file mode 100644
index 1438bbe..0000000
--- a/draco/pipelines/preprocessing/double_entity_normalization.json
+++ /dev/null
@@ -1,51 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.normalize_entity",
-        "featuretools.EntitySet.normalize_entity"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": true
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.normalize_entity#1": {
-            "base_entity_id": "readings",
-            "new_entity_id": "turbines",
-            "index": "turbine_id",
-            "make_time_index": false
-        },
-        "featuretools.EntitySet.normalize_entity#2": {
-            "base_entity_id": "readings",
-            "new_entity_id": "signals",
-            "index": "signal_id",
-            "make_time_index": false
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "dataframe": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        }
-    }
-}
diff --git a/draco/pipelines/preprocessing/entity_dataframe.json b/draco/pipelines/preprocessing/entity_dataframe.json
deleted file mode 100644
index 0bd238a..0000000
--- a/draco/pipelines/preprocessing/entity_dataframe.json
+++ /dev/null
@@ -1,26 +0,0 @@
-{
-    "primitives": [
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.add_relationship"
-    ],
-    "init_params": {
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.entity_from_dataframe#2": {
-            "entity_id": "turbines",
-            "index": "turbine_id",
-            "make_index": false
-        },
-        "featuretools.EntitySet.add_relationship#1": {
-            "parent": "turbines",
-            "parent_column": "turbine_id",
-            "child": "readings",
-            "child_column": "turbine_id"
-        }
-    }
-}
diff --git a/draco/pipelines/preprocessing/entity_normalization.json b/draco/pipelines/preprocessing/entity_normalization.json
deleted file mode 100644
index 9f3f3ab..0000000
--- a/draco/pipelines/preprocessing/entity_normalization.json
+++ /dev/null
@@ -1,20 +0,0 @@
-{
-    "primitives": [
-        "featuretools.EntitySet.entity_from_dataframe",
-        "featuretools.EntitySet.normalize_entity"
-    ],
-    "init_params": {
-        "featuretools.EntitySet.entity_from_dataframe#1": {
-            "entity_id": "readings",
-            "index": "reading_id",
-            "make_index": true,
-            "time_index": "timestamp"
-        },
-        "featuretools.EntitySet.normalize_entity#1": {
-            "base_entity_id": "readings",
-            "new_entity_id": "turbines",
-            "index": "turbine_id",
-            "make_time_index": false
-        }
-    }
-}
diff --git a/draco/pipelines/preprocessing/unstack.json b/draco/pipelines/preprocessing/unstack.json
deleted file mode 100644
index 1acd833..0000000
--- a/draco/pipelines/preprocessing/unstack.json
+++ /dev/null
@@ -1,43 +0,0 @@
-{
-    "primitives": [
-        "pandas.DataFrame.resample",
-        "pandas.DataFrame.unstack"
-    ],
-    "init_params": {
-        "pandas.DataFrame.resample#1": {
-            "rule": "600s",
-            "on": "timestamp",
-            "groupby": [
-                "turbine_id",
-                "signal_id"
-            ],
-            "aggregation": "mean",
-            "reset_index": false
-        },
-        "pandas.DataFrame.unstack#1": {
-            "level": "signal_id",
-            "reset_index": true
-        }
-    },
-    "input_names": {
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        }
-    },
-    "output_names": {
-        "pandas.DataFrame.resample#1": {
-            "X": "readings"
-        },
-        "pandas.DataFrame.unstack#1": {
-            "X": "readings"
-        }
-    },
-    "outputs": {
-        "default": [
-            {
-                "name": "readings",
-                "variable": "pandas.DataFrame.unstack#1.readings"
-            }
-        ]
-    }
-}
diff --git a/setup.py b/setup.py
index 0118bd0..1dfcc4b 100644
--- a/setup.py
+++ b/setup.py
@@ -20,10 +20,10 @@
     'mlprimitives>=0.3.2,<0.4',
     'mlblocks>=0.4.0,<0.5',
     'pymongo>=3.7.2,<4',
-    'scikit-learn>=0.21',
+    'scikit-learn>=0.21,<1.2',
     'tqdm<4.50.0,>=4.36.1',
     'scipy>=1.0.1,<2',
-    'numpy>=1.16.0,<1.21.0',
+    'numpy>=1.16.0,<1.19',
     'pandas>=1,<2',
     'tensorflow>=2,<2.3',
     'partd>=1.1.0,<2',
@@ -33,7 +33,8 @@
     'xlsxwriter>=1.3.6<1.4',
     # fix conflicts
     'protobuf<4',
-    'importlib-metadata<2,>=0.12',
+    'importlib-metadata<5',
+    #'importlib-metadata<2,>=0.12',
 ]
 
 setup_requires = [
diff --git a/tests/test_benchmark.py b/tests/test_benchmark.py
index d88425b..60736b0 100644
--- a/tests/test_benchmark.py
+++ b/tests/test_benchmark.py
@@ -8,7 +8,7 @@
 def test_predict():
     # setup
     templates = [
-        'dfs_xgb_prob_with_unstack_normalization'
+        'lstm_with_unstack'
     ]
 
     window_size_rule = [
diff --git a/tutorials/01_Draco_Machine_Learning.ipynb b/tutorials/01_Draco_Machine_Learning.ipynb
index 6b1089e..8138127 100644
--- a/tutorials/01_Draco_Machine_Learning.ipynb
+++ b/tutorials/01_Draco_Machine_Learning.ipynb
@@ -414,14 +414,9 @@
     {
      "data": {
       "text/plain": [
-       "['dfs_xgb_prob_with_unstack',\n",
-       " 'dfs_xgb_with_normalization',\n",
-       " 'dfs_xgb',\n",
-       " 'dfs_xgb_with_unstack',\n",
-       " 'dfs_xgb_prob_with_unstack_normalization',\n",
-       " 'dfs_xgb_with_unstack_normalization',\n",
-       " 'dfs_xgb_prob_with_double_normalization',\n",
-       " 'dfs_xgb_with_double_normalization',\n",
+       "['dummy',\n",
+       " 'lstm_regressor_with_unstack',\n",
+       " 'lstm_regressor',\n",
        " 'double_lstm_prob_with_unstack',\n",
        " 'double_lstm_prob',\n",
        " 'double_lstm',\n",
@@ -458,14 +453,16 @@
     {
      "data": {
       "text/plain": [
-       "['dfs_xgb_prob_with_unstack',\n",
-       " 'dfs_xgb_with_normalization',\n",
-       " 'dfs_xgb',\n",
-       " 'dfs_xgb_with_unstack',\n",
-       " 'dfs_xgb_prob_with_unstack_normalization',\n",
-       " 'dfs_xgb_with_unstack_normalization',\n",
-       " 'dfs_xgb_prob_with_double_normalization',\n",
-       " 'dfs_xgb_with_double_normalization']"
+       "['lstm_regressor_with_unstack',\n",
+       " 'lstm_regressor',\n",
+       " 'double_lstm_prob_with_unstack',\n",
+       " 'double_lstm_prob',\n",
+       " 'double_lstm',\n",
+       " 'double_lstm_with_unstack',\n",
+       " 'lstm_prob_with_unstack',\n",
+       " 'lstm_with_unstack',\n",
+       " 'lstm_prob',\n",
+       " 'lstm']"
       ]
      },
      "execution_count": 11,
@@ -474,7 +471,7 @@
     }
    ],
    "source": [
-    "get_pipelines('dfs')"
+    "get_pipelines('lstm')"
    ]
   },
   {
@@ -493,14 +490,16 @@
     {
      "data": {
       "text/plain": [
-       "{'dfs_xgb_prob_with_unstack': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack.json',\n",
-       " 'dfs_xgb_with_normalization': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_with_normalization.json',\n",
-       " 'dfs_xgb': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb.json',\n",
-       " 'dfs_xgb_with_unstack': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack.json',\n",
-       " 'dfs_xgb_prob_with_unstack_normalization': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_unstack_normalization.json',\n",
-       " 'dfs_xgb_with_unstack_normalization': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_with_unstack_normalization.json',\n",
-       " 'dfs_xgb_prob_with_double_normalization': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_prob_with_double_normalization.json',\n",
-       " 'dfs_xgb_with_double_normalization': '/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/draco/pipelines/dfs_xgb/dfs_xgb_with_double_normalization.json'}"
+       "{'lstm_regressor_with_unstack': '/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json',\n",
+       " 'lstm_regressor': '/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/draco/pipelines/lstm_regressor/lstm_regressor.json',\n",
+       " 'double_lstm_prob_with_unstack': '/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json',\n",
+       " 'double_lstm_prob': '/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/draco/pipelines/double_lstm/double_lstm_prob.json',\n",
+       " 'double_lstm': '/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/draco/pipelines/double_lstm/double_lstm.json',\n",
+       " 'double_lstm_with_unstack': '/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/draco/pipelines/double_lstm/double_lstm_with_unstack.json',\n",
+       " 'lstm_prob_with_unstack': '/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/draco/pipelines/lstm/lstm_prob_with_unstack.json',\n",
+       " 'lstm_with_unstack': '/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/draco/pipelines/lstm/lstm_with_unstack.json',\n",
+       " 'lstm_prob': '/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/draco/pipelines/lstm/lstm_prob.json',\n",
+       " 'lstm': '/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/draco/pipelines/lstm/lstm.json'}"
       ]
      },
      "execution_count": 12,
@@ -509,7 +508,7 @@
     }
    ],
    "source": [
-    "get_pipelines('dfs', path=True)"
+    "get_pipelines('lstm', path=True)"
    ]
   },
   {
@@ -539,8 +538,8 @@
    "outputs": [],
    "source": [
     "templates = [\n",
-    "    'dfs_xgb_with_unstack_normalization', \n",
-    "    'dfs_xgb_with_double_normalization'\n",
+    "    'lstm_with_unstack', \n",
+    "    'double_lstm_with_unstack'\n",
     "]"
    ]
   },
@@ -613,56 +612,43 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:btb.session:Obtaining default configuration for dfs_xgb_with_unstack_normalization\n",
-      "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: dfs_xgb_with_unstack_normalization \n",
-      "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 100\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 3\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.1\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.0\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 1\n",
-      "INFO:btb.session:New optimal found: dfs_xgb_with_unstack_normalization - 0.6117760013143775\n",
-      "INFO:btb.session:Obtaining default configuration for dfs_xgb_with_double_normalization\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
+      "INFO:btb.session:Obtaining default configuration for lstm_with_unstack\n",
+      "2023-04-07 14:17:30.569247: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
+      "2023-04-07 14:17:30.594310: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fa3c50cbbb0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+      "2023-04-07 14:17:30.594323: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: dfs_xgb_with_unstack_normalization \n",
+      "  Template: lstm_with_unstack \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 90\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 342\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 6\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.9043352048331922\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.5258350872963311\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 9\n",
-      "INFO:btb.session:New optimal found: dfs_xgb_with_unstack_normalization - 0.6205571445297473\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
+      "INFO:btb.session:New optimal found: lstm_with_unstack - 0.6292647327130085\n",
+      "INFO:btb.session:Obtaining default configuration for double_lstm_with_unstack\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: dfs_xgb_with_double_normalization \n",
+      "  Template: double_lstm_with_unstack \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 80\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 66\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 10\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6434375682152088\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.14135407511034503\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 10\n",
-      "INFO:btb.session:New optimal found: dfs_xgb_with_double_normalization - 0.629513025867624\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n"
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_2_units'): 80\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_2_rate'): 0.3\n",
+      "INFO:btb.session:New optimal found: double_lstm_with_unstack - 0.6434978589136803\n",
+      "INFO:btb.session:Generating new proposal configuration for double_lstm_with_unstack\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': '452a22a136f67c575aee3341c9dc2395',\n",
-       " 'name': 'dfs_xgb_with_double_normalization',\n",
-       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 80,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 66,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 10,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6434375682152088,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.14135407511034503,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
-       " 'score': 0.629513025867624}"
+       "{'id': '2904e940a6e56612e275f93afc00c6e6',\n",
+       " 'name': 'double_lstm_with_unstack',\n",
+       " 'config': {('sklearn.impute.SimpleImputer#1', 'strategy'): 'mean',\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80,\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3,\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_2_units'): 80,\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1',\n",
+       "   'dropout_2_rate'): 0.3},\n",
+       " 'score': 0.6434978589136803}"
       ]
      },
      "execution_count": 16,
@@ -671,7 +657,7 @@
     }
    ],
    "source": [
-    "session.run(5)"
+    "session.run(3)"
    ]
   },
   {
@@ -690,16 +676,15 @@
     {
      "data": {
       "text/plain": [
-       "{'id': '452a22a136f67c575aee3341c9dc2395',\n",
-       " 'name': 'dfs_xgb_with_double_normalization',\n",
-       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 80,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 66,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 10,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6434375682152088,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.14135407511034503,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
-       " 'score': 0.629513025867624}"
+       "{'id': '2904e940a6e56612e275f93afc00c6e6',\n",
+       " 'name': 'double_lstm_with_unstack',\n",
+       " 'config': {('sklearn.impute.SimpleImputer#1', 'strategy'): 'mean',\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80,\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3,\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_2_units'): 80,\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1',\n",
+       "   'dropout_2_rate'): 0.3},\n",
+       " 'score': 0.6434978589136803}"
       ]
      },
      "execution_count": 17,
@@ -726,13 +711,11 @@
     {
      "data": {
       "text/plain": [
-       "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 80,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 66,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 10,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.6434375682152088,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.14135407511034503,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10}"
+       "{('sklearn.impute.SimpleImputer#1', 'strategy'): 'mean',\n",
+       " ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80,\n",
+       " ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3,\n",
+       " ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_2_units'): 80,\n",
+       " ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_2_rate'): 0.3}"
       ]
      },
      "execution_count": 18,
@@ -759,7 +742,7 @@
     {
      "data": {
       "text/plain": [
-       "'dfs_xgb_with_double_normalization'"
+       "'double_lstm_with_unstack'"
       ]
      },
      "execution_count": 19,
@@ -787,7 +770,7 @@
     {
      "data": {
       "text/plain": [
-       "0.629513025867624"
+       "0.6434978589136803"
       ]
      },
      "execution_count": 20,
@@ -817,41 +800,40 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n",
+      "INFO:btb.session:Generating new proposal configuration for lstm_with_unstack\n",
+      "INFO:btb.session:Generating new proposal configuration for lstm_with_unstack\n",
+      "INFO:btb.session:Generating new proposal configuration for double_lstm_with_unstack\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: dfs_xgb_with_unstack_normalization \n",
+      "  Template: double_lstm_with_unstack \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 48\n",
-      "      ('xgboost.XGBClassifier#1', 'n_estimators'): 130\n",
-      "      ('xgboost.XGBClassifier#1', 'max_depth'): 8\n",
-      "      ('xgboost.XGBClassifier#1', 'learning_rate'): 0.7437898568465957\n",
-      "      ('xgboost.XGBClassifier#1', 'gamma'): 0.9963350624783064\n",
-      "      ('xgboost.XGBClassifier#1', 'min_child_weight'): 10\n",
-      "INFO:btb.session:New optimal found: dfs_xgb_with_unstack_normalization - 0.651642052400304\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_double_normalization\n",
-      "INFO:btb.session:Generating new proposal configuration for dfs_xgb_with_unstack_normalization\n"
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): constant\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_1_units'): 224\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.31140813814002105\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_2_units'): 268\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_2_rate'): 0.607924752631197\n",
+      "INFO:btb.session:New optimal found: double_lstm_with_unstack - 0.654373123123123\n",
+      "INFO:btb.session:Generating new proposal configuration for lstm_with_unstack\n",
+      "INFO:btb.session:Generating new proposal configuration for double_lstm_with_unstack\n",
+      "INFO:btb.session:Generating new proposal configuration for lstm_with_unstack\n",
+      "INFO:btb.session:Generating new proposal configuration for double_lstm_with_unstack\n",
+      "INFO:btb.session:Generating new proposal configuration for lstm_with_unstack\n",
+      "INFO:btb.session:Generating new proposal configuration for double_lstm_with_unstack\n",
+      "INFO:btb.session:Generating new proposal configuration for lstm_with_unstack\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "{'id': '22ec731234212508b7b4413ccce34294',\n",
-       " 'name': 'dfs_xgb_with_unstack_normalization',\n",
-       " 'config': {('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "   'max_labels'): 48,\n",
-       "  ('xgboost.XGBClassifier#1', 'n_estimators'): 130,\n",
-       "  ('xgboost.XGBClassifier#1', 'max_depth'): 8,\n",
-       "  ('xgboost.XGBClassifier#1', 'learning_rate'): 0.7437898568465957,\n",
-       "  ('xgboost.XGBClassifier#1', 'gamma'): 0.9963350624783064,\n",
-       "  ('xgboost.XGBClassifier#1', 'min_child_weight'): 10},\n",
-       " 'score': 0.651642052400304}"
+       "{'id': 'a34709538eddbb67637d57d48237d69d',\n",
+       " 'name': 'double_lstm_with_unstack',\n",
+       " 'config': {('sklearn.impute.SimpleImputer#1', 'strategy'): 'constant',\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_1_units'): 224,\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1',\n",
+       "   'dropout_1_rate'): 0.31140813814002105,\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_2_units'): 268,\n",
+       "  ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1',\n",
+       "   'dropout_2_rate'): 0.607924752631197},\n",
+       " 'score': 0.654373123123123}"
       ]
      },
      "execution_count": 21,
@@ -871,7 +853,7 @@
     {
      "data": {
       "text/plain": [
-       "0.651642052400304"
+       "0.654373123123123"
       ]
      },
      "execution_count": 22,
@@ -891,13 +873,13 @@
     {
      "data": {
       "text/plain": [
-       "{('mlprimitives.custom.feature_extraction.CategoricalEncoder#1',\n",
-       "  'max_labels'): 48,\n",
-       " ('xgboost.XGBClassifier#1', 'n_estimators'): 130,\n",
-       " ('xgboost.XGBClassifier#1', 'max_depth'): 8,\n",
-       " ('xgboost.XGBClassifier#1', 'learning_rate'): 0.7437898568465957,\n",
-       " ('xgboost.XGBClassifier#1', 'gamma'): 0.9963350624783064,\n",
-       " ('xgboost.XGBClassifier#1', 'min_child_weight'): 10}"
+       "{('sklearn.impute.SimpleImputer#1', 'strategy'): 'constant',\n",
+       " ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_1_units'): 224,\n",
+       " ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1',\n",
+       "  'dropout_1_rate'): 0.31140813814002105,\n",
+       " ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_2_units'): 268,\n",
+       " ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1',\n",
+       "  'dropout_2_rate'): 0.607924752631197}"
       ]
      },
      "execution_count": 23,
@@ -964,7 +946,7 @@
     {
      "data": {
       "text/plain": [
-       "0.608695652173913"
+       "0.6521739130434783"
       ]
      },
      "execution_count": 26,
@@ -1067,7 +1049,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.8.16"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/03_Benchmarking.ipynb b/tutorials/03_Benchmarking.ipynb
index 12c5e47..f22224a 100644
--- a/tutorials/03_Benchmarking.ipynb
+++ b/tutorials/03_Benchmarking.ipynb
@@ -77,7 +77,7 @@
    "source": [
     "templates = [\n",
     "    'lstm_prob_with_unstack',\n",
-    "    'dfs_xgb_prob_with_double_normalization'\n",
+    "    'double_lstm_prob_with_unstack'\n",
     "]\n",
     "window_size_rule = [('1d', '1h'), ('2d', '2h')]\n",
     "init_params = {\n",
@@ -85,6 +85,11 @@
     "        'keras.Sequential.LSTMTimeSeriesClassifier#1': {\n",
     "            'epochs': 1,\n",
     "        }\n",
+    "    },\n",
+    "    'double_lstm_prob_with_unstack': {\n",
+    "        'keras.Sequential.DoubleLSTMTimeSeriesClassifier#1': {\n",
+    "            'epochs': 1,\n",
+    "        }\n",
     "    }\n",
     "}\n"
    ]
@@ -100,90 +105,58 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "INFO:draco.benchmark:Evaluating template probability.unstack_lstm_timeseries_classifier on problem None (1d, 1h)\n",
+      "INFO:draco.benchmark:Evaluating template lstm_prob_with_unstack on problem None (1d, 1h)\n",
+      "2023-04-07 14:33:33.017625: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
+      "2023-04-07 14:33:33.043631: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fc3e937a8e0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+      "2023-04-07 14:33:33.043643: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: probability.unstack_lstm_timeseries_classifier \n",
+      "  Template: lstm_prob_with_unstack \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
+      "INFO:draco.benchmark:Evaluating template lstm_prob_with_unstack on problem None (2d, 2h)\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: probability.unstack_lstm_timeseries_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): constant\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 287\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.565737233372491\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 145\n",
-      "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: probability.unstack_lstm_timeseries_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): constant\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 269\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.5973752345055594\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 412\n",
-      "INFO:draco.benchmark:Evaluating template probability.unstack_lstm_timeseries_classifier on problem None (2d, 2h)\n",
-      "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: probability.unstack_lstm_timeseries_classifier \n",
+      "  Template: lstm_prob_with_unstack \n",
       "    Hyperparameters: \n",
       "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
       "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 80\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: probability.unstack_lstm_timeseries_classifier \n",
+      "  Template: lstm_prob_with_unstack \n",
       "    Hyperparameters: \n",
-      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 114\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.016427744327526084\n",
-      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 224\n",
-      "INFO:draco.benchmark:Evaluating template probability.normalize_dfs_xgb_classifier on problem None (1d, 1h)\n",
-      "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: probability.normalize_dfs_xgb_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'n_estimators'): 100\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'max_depth'): 3\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.1\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.0\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 1\n",
-      "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: probability.normalize_dfs_xgb_classifier \n",
-      "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 11\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'n_estimators'): 231\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'max_depth'): 9\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.554989010368875\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.909957492053926\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 7\n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): median\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'lstm_1_units'): 137\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.612475373625103\n",
+      "      ('keras.Sequential.LSTMTimeSeriesClassifier#1', 'dense_1_units'): 191\n",
+      "INFO:draco.benchmark:Evaluating template double_lstm_prob_with_unstack on problem None (1d, 1h)\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: probability.normalize_dfs_xgb_classifier \n",
+      "  Template: double_lstm_prob_with_unstack \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 61\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'n_estimators'): 122\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'max_depth'): 5\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.6840927016151666\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.5480298094360865\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 6\n",
-      "INFO:draco.benchmark:Evaluating template probability.normalize_dfs_xgb_classifier on problem None (2d, 2h)\n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_2_units'): 80\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_2_rate'): 0.3\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: probability.normalize_dfs_xgb_classifier \n",
+      "  Template: double_lstm_prob_with_unstack \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 0\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'n_estimators'): 100\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'max_depth'): 3\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.1\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.0\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 1\n",
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): constant\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_1_units'): 245\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.4308586778212253\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_2_units'): 221\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_2_rate'): 0.5926391753395145\n",
+      "INFO:draco.benchmark:Evaluating template double_lstm_prob_with_unstack on problem None (2d, 2h)\n",
       "INFO:draco.pipeline:New configuration found:\n",
-      "  Template: probability.normalize_dfs_xgb_classifier \n",
+      "  Template: double_lstm_prob_with_unstack \n",
       "    Hyperparameters: \n",
-      "      ('mlprimitives.custom.feature_extraction.CategoricalEncoder#1', 'max_labels'): 99\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'n_estimators'): 616\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'max_depth'): 8\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'learning_rate'): 0.0700166745838724\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'gamma'): 0.40990340522001234\n",
-      "      ('xgboost.XGBClassifier:probabilities#1', 'min_child_weight'): 10\n"
+      "      ('sklearn.impute.SimpleImputer#1', 'strategy'): mean\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_1_units'): 80\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_1_rate'): 0.3\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'lstm_2_units'): 80\n",
+      "      ('keras.Sequential.DoubleLSTMTimeSeriesClassifier#1', 'dropout_2_rate'): 0.3\n"
      ]
     }
    ],
@@ -252,20 +225,20 @@
        "      <td>None</td>\n",
        "      <td>1d</td>\n",
        "      <td>1h</td>\n",
-       "      <td>probability.unstack_lstm_timeseries_classifier</td>\n",
-       "      <td>0.350122</td>\n",
-       "      <td>0.538316</td>\n",
-       "      <td>0.618558</td>\n",
-       "      <td>0.463675</td>\n",
+       "      <td>lstm_prob_with_unstack</td>\n",
+       "      <td>0.494505</td>\n",
+       "      <td>0.589905</td>\n",
+       "      <td>0.589905</td>\n",
+       "      <td>0.322650</td>\n",
        "      <td>roc_auc_score</td>\n",
        "      <td>{'threshold': 0.5}</td>\n",
-       "      <td>0 days 00:00:04.250012</td>\n",
-       "      <td>0 days 00:00:14.374875</td>\n",
-       "      <td>0 days 00:00:15.360015</td>\n",
-       "      <td>0 days 00:01:10.806375</td>\n",
+       "      <td>0 days 00:00:03.873157</td>\n",
+       "      <td>0 days 00:00:14.369536</td>\n",
+       "      <td>0 days 00:00:08.178422</td>\n",
+       "      <td>0 days 00:00:47.144655</td>\n",
        "      <td>OK</td>\n",
-       "      <td>0.640449</td>\n",
-       "      <td>0.058824</td>\n",
+       "      <td>0.280899</td>\n",
+       "      <td>0.255814</td>\n",
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "    </tr>\n",
@@ -274,20 +247,20 @@
        "      <td>None</td>\n",
        "      <td>2d</td>\n",
        "      <td>2h</td>\n",
-       "      <td>probability.unstack_lstm_timeseries_classifier</td>\n",
-       "      <td>0.686203</td>\n",
-       "      <td>0.491949</td>\n",
-       "      <td>0.556803</td>\n",
-       "      <td>0.510989</td>\n",
+       "      <td>lstm_prob_with_unstack</td>\n",
+       "      <td>0.446581</td>\n",
+       "      <td>0.543056</td>\n",
+       "      <td>0.561570</td>\n",
+       "      <td>0.707875</td>\n",
        "      <td>roc_auc_score</td>\n",
        "      <td>{'threshold': 0.5}</td>\n",
-       "      <td>0 days 00:00:04.410682</td>\n",
-       "      <td>0 days 00:00:14.411205</td>\n",
-       "      <td>0 days 00:00:10.633619</td>\n",
-       "      <td>0 days 00:00:55.011304</td>\n",
+       "      <td>0 days 00:00:03.460467</td>\n",
+       "      <td>0 days 00:00:12.121905</td>\n",
+       "      <td>0 days 00:00:08.275919</td>\n",
+       "      <td>0 days 00:00:44.449291</td>\n",
        "      <td>OK</td>\n",
-       "      <td>0.595506</td>\n",
-       "      <td>0.307692</td>\n",
+       "      <td>0.730337</td>\n",
+       "      <td>0.586207</td>\n",
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "    </tr>\n",
@@ -296,20 +269,20 @@
        "      <td>None</td>\n",
        "      <td>1d</td>\n",
        "      <td>1h</td>\n",
-       "      <td>probability.normalize_dfs_xgb_classifier</td>\n",
-       "      <td>0.697802</td>\n",
-       "      <td>0.669508</td>\n",
-       "      <td>0.701792</td>\n",
-       "      <td>0.766789</td>\n",
+       "      <td>double_lstm_prob_with_unstack</td>\n",
+       "      <td>0.813187</td>\n",
+       "      <td>0.307993</td>\n",
+       "      <td>0.592696</td>\n",
+       "      <td>0.417582</td>\n",
        "      <td>roc_auc_score</td>\n",
        "      <td>{'threshold': 0.5}</td>\n",
-       "      <td>0 days 00:01:11.416859</td>\n",
-       "      <td>0 days 00:02:55.012078</td>\n",
-       "      <td>0 days 00:00:00.806430</td>\n",
-       "      <td>0 days 00:05:20.653100</td>\n",
+       "      <td>0 days 00:00:05.460985</td>\n",
+       "      <td>0 days 00:00:18.103660</td>\n",
+       "      <td>0 days 00:00:14.011877</td>\n",
+       "      <td>0 days 00:01:11.192546</td>\n",
        "      <td>OK</td>\n",
-       "      <td>0.797753</td>\n",
-       "      <td>0.666667</td>\n",
+       "      <td>0.303371</td>\n",
+       "      <td>0.367347</td>\n",
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "    </tr>\n",
@@ -318,20 +291,20 @@
        "      <td>None</td>\n",
        "      <td>2d</td>\n",
        "      <td>2h</td>\n",
-       "      <td>probability.normalize_dfs_xgb_classifier</td>\n",
-       "      <td>0.720391</td>\n",
-       "      <td>0.718617</td>\n",
-       "      <td>0.740664</td>\n",
-       "      <td>0.782662</td>\n",
+       "      <td>double_lstm_prob_with_unstack</td>\n",
+       "      <td>0.245726</td>\n",
+       "      <td>0.663919</td>\n",
+       "      <td>0.663919</td>\n",
+       "      <td>0.293346</td>\n",
        "      <td>roc_auc_score</td>\n",
        "      <td>{'threshold': 0.5}</td>\n",
-       "      <td>0 days 00:01:03.612676</td>\n",
-       "      <td>0 days 00:02:26.925796</td>\n",
-       "      <td>0 days 00:00:00.755424</td>\n",
-       "      <td>0 days 00:04:37.570182</td>\n",
+       "      <td>0 days 00:00:05.568835</td>\n",
+       "      <td>0 days 00:00:17.948361</td>\n",
+       "      <td>0 days 00:00:14.003816</td>\n",
+       "      <td>0 days 00:01:11.051792</td>\n",
        "      <td>OK</td>\n",
-       "      <td>0.820225</td>\n",
-       "      <td>0.692308</td>\n",
+       "      <td>0.303371</td>\n",
+       "      <td>0.184211</td>\n",
        "      <td>1.0</td>\n",
        "      <td>0.0</td>\n",
        "    </tr>\n",
@@ -340,41 +313,41 @@
        "</div>"
       ],
       "text/plain": [
-       "  problem_name window_size resample_rule  \\\n",
-       "0         None          1d            1h   \n",
-       "1         None          2d            2h   \n",
-       "2         None          1d            1h   \n",
-       "3         None          2d            2h   \n",
+       "  problem_name window_size resample_rule                       template  \\\n",
+       "0         None          1d            1h         lstm_prob_with_unstack   \n",
+       "1         None          2d            2h         lstm_prob_with_unstack   \n",
+       "2         None          1d            1h  double_lstm_prob_with_unstack   \n",
+       "3         None          2d            2h  double_lstm_prob_with_unstack   \n",
        "\n",
-       "                                         template  default_test  default_cv  \\\n",
-       "0  probability.unstack_lstm_timeseries_classifier      0.350122    0.538316   \n",
-       "1  probability.unstack_lstm_timeseries_classifier      0.686203    0.491949   \n",
-       "2        probability.normalize_dfs_xgb_classifier      0.697802    0.669508   \n",
-       "3        probability.normalize_dfs_xgb_classifier      0.720391    0.718617   \n",
+       "   default_test  default_cv  tuned_cv  tuned_test  tuning_metric  \\\n",
+       "0      0.494505    0.589905  0.589905    0.322650  roc_auc_score   \n",
+       "1      0.446581    0.543056  0.561570    0.707875  roc_auc_score   \n",
+       "2      0.813187    0.307993  0.592696    0.417582  roc_auc_score   \n",
+       "3      0.245726    0.663919  0.663919    0.293346  roc_auc_score   \n",
        "\n",
-       "   tuned_cv  tuned_test  tuning_metric tuning_metric_kwargs  \\\n",
-       "0  0.618558    0.463675  roc_auc_score   {'threshold': 0.5}   \n",
-       "1  0.556803    0.510989  roc_auc_score   {'threshold': 0.5}   \n",
-       "2  0.701792    0.766789  roc_auc_score   {'threshold': 0.5}   \n",
-       "3  0.740664    0.782662  roc_auc_score   {'threshold': 0.5}   \n",
+       "  tuning_metric_kwargs       fit_predict_time        default_cv_time  \\\n",
+       "0   {'threshold': 0.5} 0 days 00:00:03.873157 0 days 00:00:14.369536   \n",
+       "1   {'threshold': 0.5} 0 days 00:00:03.460467 0 days 00:00:12.121905   \n",
+       "2   {'threshold': 0.5} 0 days 00:00:05.460985 0 days 00:00:18.103660   \n",
+       "3   {'threshold': 0.5} 0 days 00:00:05.568835 0 days 00:00:17.948361   \n",
        "\n",
-       "        fit_predict_time        default_cv_time        average_cv_time  \\\n",
-       "0 0 days 00:00:04.250012 0 days 00:00:14.374875 0 days 00:00:15.360015   \n",
-       "1 0 days 00:00:04.410682 0 days 00:00:14.411205 0 days 00:00:10.633619   \n",
-       "2 0 days 00:01:11.416859 0 days 00:02:55.012078 0 days 00:00:00.806430   \n",
-       "3 0 days 00:01:03.612676 0 days 00:02:26.925796 0 days 00:00:00.755424   \n",
+       "         average_cv_time             total_time status  \\\n",
+       "0 0 days 00:00:08.178422 0 days 00:00:47.144655     OK   \n",
+       "1 0 days 00:00:08.275919 0 days 00:00:44.449291     OK   \n",
+       "2 0 days 00:00:14.011877 0 days 00:01:11.192546     OK   \n",
+       "3 0 days 00:00:14.003816 0 days 00:01:11.051792     OK   \n",
        "\n",
-       "              total_time status  accuracy_threshold/0.5  f1_threshold/0.5  \\\n",
-       "0 0 days 00:01:10.806375     OK                0.640449          0.058824   \n",
-       "1 0 days 00:00:55.011304     OK                0.595506          0.307692   \n",
-       "2 0 days 00:05:20.653100     OK                0.797753          0.666667   \n",
-       "3 0 days 00:04:37.570182     OK                0.820225          0.692308   \n",
+       "   accuracy_threshold/0.5  f1_threshold/0.5  fpr_threshold/0.5  \\\n",
+       "0                0.280899          0.255814                1.0   \n",
+       "1                0.730337          0.586207                1.0   \n",
+       "2                0.303371          0.367347                1.0   \n",
+       "3                0.303371          0.184211                1.0   \n",
        "\n",
-       "   fpr_threshold/0.5  tpr_threshold/0.5  \n",
-       "0                1.0                0.0  \n",
-       "1                1.0                0.0  \n",
-       "2                1.0                0.0  \n",
-       "3                1.0                0.0  "
+       "   tpr_threshold/0.5  \n",
+       "0                0.0  \n",
+       "1                0.0  \n",
+       "2                0.0  \n",
+       "3                0.0  "
       ]
      },
      "execution_count": 4,
@@ -403,7 +376,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.8.16"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/04_Draco_Regression_Pipeline.ipynb b/tutorials/04_Draco_Regression_Pipeline.ipynb
index 709c839..4d241ca 100644
--- a/tutorials/04_Draco_Regression_Pipeline.ipynb
+++ b/tutorials/04_Draco_Regression_Pipeline.ipynb
@@ -515,14 +515,7 @@
     {
      "data": {
       "text/plain": [
-       "['dfs_xgb_prob_with_unstack',\n",
-       " 'dfs_xgb_with_normalization',\n",
-       " 'dfs_xgb',\n",
-       " 'dfs_xgb_with_unstack',\n",
-       " 'dfs_xgb_prob_with_unstack_normalization',\n",
-       " 'dfs_xgb_with_unstack_normalization',\n",
-       " 'dfs_xgb_prob_with_double_normalization',\n",
-       " 'dfs_xgb_with_double_normalization',\n",
+       "['dummy',\n",
        " 'lstm_regressor_with_unstack',\n",
        " 'lstm_regressor',\n",
        " 'double_lstm_prob_with_unstack',\n",
@@ -615,7 +608,13 @@
    "source": [
     "from draco.pipeline import DracoPipeline\n",
     "\n",
-    "pipeline = DracoPipeline(pipeline_name)"
+    "init_params = {\n",
+    "    \"keras.Sequential.LSTMTimeSeriesRegressor#1\": {\n",
+    "        \"epochs\": 10\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "pipeline = DracoPipeline(pipeline_name, init_params=init_params)"
    ]
   },
   {
@@ -634,10 +633,35 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-02-01 15:05:13.365367: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2022-02-01 15:05:13.379993: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fe6a0ec50a0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
-      "2022-02-01 15:05:13.380010: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
+      "2023-04-07 16:46:35.571262: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
+      "2023-04-07 16:46:35.594871: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7ff23c392800 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+      "2023-04-07 16:46:35.594885: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/10\n",
+      "227/227 [==============================] - 6s 28ms/step - loss: 9064.8613 - mse: 9064.8613 - val_loss: 11566.7559 - val_mse: 11566.7559\n",
+      "Epoch 2/10\n",
+      "227/227 [==============================] - 6s 27ms/step - loss: 6775.8911 - mse: 6775.8911 - val_loss: 9392.9561 - val_mse: 9392.9561\n",
+      "Epoch 3/10\n",
+      "227/227 [==============================] - 6s 27ms/step - loss: 5391.6719 - mse: 5391.6719 - val_loss: 7923.1221 - val_mse: 7923.1221\n",
+      "Epoch 4/10\n",
+      "227/227 [==============================] - 6s 28ms/step - loss: 4524.3457 - mse: 4524.3457 - val_loss: 6955.8647 - val_mse: 6955.8647\n",
+      "Epoch 5/10\n",
+      "227/227 [==============================] - 7s 33ms/step - loss: 4040.5396 - mse: 4040.5396 - val_loss: 6356.0605 - val_mse: 6356.0605\n",
+      "Epoch 6/10\n",
+      "227/227 [==============================] - 6s 28ms/step - loss: 3802.5298 - mse: 3802.5298 - val_loss: 5998.2061 - val_mse: 5998.2061\n",
+      "Epoch 7/10\n",
+      "227/227 [==============================] - 7s 30ms/step - loss: 3683.9429 - mse: 3683.9429 - val_loss: 5790.9092 - val_mse: 5790.9092\n",
+      "Epoch 8/10\n",
+      "227/227 [==============================] - 7s 33ms/step - loss: 3636.9177 - mse: 3636.9177 - val_loss: 5674.6558 - val_mse: 5674.6558\n",
+      "Epoch 9/10\n",
+      "227/227 [==============================] - 7s 30ms/step - loss: 3609.4973 - mse: 3609.4973 - val_loss: 5619.3926 - val_mse: 5619.3926\n",
+      "Epoch 10/10\n",
+      "227/227 [==============================] - 7s 29ms/step - loss: 3617.7119 - mse: 3617.7119 - val_loss: 5587.2671 - val_mse: 5587.2671\n"
      ]
     }
    ],
@@ -658,7 +682,15 @@
    "cell_type": "code",
    "execution_count": 17,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2/2 [==============================] - 0s 3ms/step\n"
+     ]
+    }
+   ],
    "source": [
     "predictions = pipeline.predict(test_target_times, readings)"
    ]
@@ -678,7 +710,7 @@
     {
      "data": {
       "text/plain": [
-       "0.6362969806460871"
+       "-0.1533211964451806"
       ]
      },
      "execution_count": 18,
@@ -748,14 +780,21 @@
    "execution_count": 21,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "2/2 [==============================] - 0s 5ms/step\n"
+     ]
+    },
     {
      "data": {
       "text/plain": [
-       "array([[129.89064 ],\n",
-       "       [139.89001 ],\n",
-       "       [ 39.425865],\n",
-       "       [110.67838 ],\n",
-       "       [ 98.52903 ]], dtype=float32)"
+       "array([[91.7917  ],\n",
+       "       [91.791695],\n",
+       "       [91.79166 ],\n",
+       "       [91.79167 ],\n",
+       "       [91.79167 ]], dtype=float32)"
       ]
      },
      "execution_count": 21,
@@ -785,7 +824,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.8.16"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/pipelines/dfs_xgb_with_double_normalization.ipynb b/tutorials/pipelines/dfs_xgb_with_double_normalization.ipynb
deleted file mode 100644
index 6fd5f1e..0000000
--- a/tutorials/pipelines/dfs_xgb_with_double_normalization.ipynb
+++ /dev/null
@@ -1,1363 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# dfs_xgb_with_double_normalization"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from draco.demo import load_demo\n",
-    "\n",
-    "target_times, readings = load_demo()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pipeline_name = 'dfs_xgb_with_double_normalization'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/dask/dataframe/utils.py:14: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
-      "  import pandas.util.testing as tm\n"
-     ]
-    }
-   ],
-   "source": [
-    "from draco.pipeline import DracoPipeline\n",
-    "\n",
-    "pipeline = DracoPipeline(pipeline_name)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['mlblocks.MLPipeline',\n",
-       " 'featuretools.dfs',\n",
-       " 'mlprimitives.custom.feature_extraction.CategoricalEncoder',\n",
-       " 'xgboost.XGBClassifier']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pipeline.template['primitives']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Step by Step execution"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Input Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>323.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S02</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>320.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S03</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>284.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S04</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>348.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S05</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>273.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id signal_id  timestamp  value\n",
-       "0       T001       S01 2013-01-10  323.0\n",
-       "1       T001       S02 2013-01-10  320.0\n",
-       "2       T001       S03 2013-01-10  284.0\n",
-       "3       T001       S04 2013-01-10  348.0\n",
-       "4       T001       S05 2013-01-10  273.0"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "readings.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>cutoff_time</th>\n",
-       "      <th>target</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-12</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-13</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-14</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-15</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-16</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id cutoff_time  target\n",
-       "0       T001  2013-01-12       0\n",
-       "1       T001  2013-01-13       0\n",
-       "2       T001  2013-01-14       0\n",
-       "3       T001  2013-01-15       1\n",
-       "4       T001  2013-01-16       0"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_times.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Data Preparation (part of Draco Pipeline)\n",
-    "\n",
-    "* Input: target_times, readings, turbines\n",
-    "* Output: X, y, readings, turbines\n",
-    "* Effect: target_times has been split into X and y"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## mlblocks.MLPipeline\n",
-    "\n",
-    "### pandas.DataFrame.resample\n",
-    "\n",
-    "* Input: readings\n",
-    "* Output: readings (resampled)\n",
-    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
-    "  signal_id and timestamp have been set as a multi-index\n",
-    "\n",
-    "###  featuretools.EntitySet.entity_from_dataframe\n",
-    "\n",
-    "* Input: readings (resampled)\n",
-    "* Output: entityset\n",
-    "* Effect: Entityset has been generated from readings\n",
-    "\n",
-    "### featuretools.EntitySet.normalize_entity\n",
-    "\n",
-    "* Input: entityset\n",
-    "* Output: entityset with relationship (readings.turbine_id with turbines.turbine_id)\n",
-    "* Effect: establish relation between readings and turbines\n",
-    "\n",
-    "### featuretools.EntitySet.normalize_entity\n",
-    "\n",
-    "* Input: entityset\n",
-    "* Output: entityset with relationship (readings.signal_id with signals.signal_id)\n",
-    "* Effect: establish relationship between readings and signals"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 0\n",
-    "context = pipeline.fit(target_times, readings, output_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y', 'entityset'])"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Entityset: entityset\n",
-       "  Entities:\n",
-       "    readings [Rows: 1329146, Columns: 5]\n",
-       "    turbines [Rows: 1, Columns: 1]\n",
-       "    signals [Rows: 26, Columns: 1]\n",
-       "  Relationships:\n",
-       "    readings.turbine_id -> turbines.turbine_id\n",
-       "    readings.signal_id -> signals.signal_id"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['entityset']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## featuretools.dfs\n",
-    "\n",
-    "* Input: entityset (unstacked, no turbine_id, no timestamp)\n",
-    "* Output: X (has additional features)\n",
-    "* Effect: build features for relational dataset using DFS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 1\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>SUM(readings.value)</th>\n",
-       "      <th>STD(readings.value)</th>\n",
-       "      <th>MAX(readings.value)</th>\n",
-       "      <th>SKEW(readings.value)</th>\n",
-       "      <th>MIN(readings.value)</th>\n",
-       "      <th>MEAN(readings.value)</th>\n",
-       "      <th>COUNT(readings)</th>\n",
-       "      <th>NUM_UNIQUE(readings.signal_id)</th>\n",
-       "      <th>MODE(readings.signal_id)</th>\n",
-       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
-       "      <th>...</th>\n",
-       "      <th>MEAN(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>MEAN(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>3.433649e+09</td>\n",
-       "      <td>1.456860e+06</td>\n",
-       "      <td>3448719.0</td>\n",
-       "      <td>1.019214</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>917107.079193</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>26</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>11</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>3.441489e+09</td>\n",
-       "      <td>1.459865e+06</td>\n",
-       "      <td>3453777.0</td>\n",
-       "      <td>1.018761</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>919201.162179</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>26</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>12</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>5</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>3.455470e+09</td>\n",
-       "      <td>1.465277e+06</td>\n",
-       "      <td>3463880.0</td>\n",
-       "      <td>1.018192</td>\n",
-       "      <td>2.7</td>\n",
-       "      <td>922935.352244</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>26</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>13</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>6</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>3.475361e+09</td>\n",
-       "      <td>1.473337e+06</td>\n",
-       "      <td>3474703.0</td>\n",
-       "      <td>1.017666</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>928248.092869</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>26</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>14</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>2.888083e+09</td>\n",
-       "      <td>1.477958e+06</td>\n",
-       "      <td>3485019.0</td>\n",
-       "      <td>1.032002</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>924186.531200</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>26</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2</td>\n",
-       "      <td>...</td>\n",
-       "      <td>2.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>15</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 99 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            SUM(readings.value)  STD(readings.value)  MAX(readings.value)  \\\n",
-       "turbine_id                                                                  \n",
-       "T001               3.433649e+09         1.456860e+06            3448719.0   \n",
-       "T001               3.441489e+09         1.459865e+06            3453777.0   \n",
-       "T001               3.455470e+09         1.465277e+06            3463880.0   \n",
-       "T001               3.475361e+09         1.473337e+06            3474703.0   \n",
-       "T001               2.888083e+09         1.477958e+06            3485019.0   \n",
-       "\n",
-       "            SKEW(readings.value)  MIN(readings.value)  MEAN(readings.value)  \\\n",
-       "turbine_id                                                                    \n",
-       "T001                    1.019214                  0.0         917107.079193   \n",
-       "T001                    1.018761                  0.0         919201.162179   \n",
-       "T001                    1.018192                  2.7         922935.352244   \n",
-       "T001                    1.017666                 -1.0         928248.092869   \n",
-       "T001                    1.032002                  0.0         924186.531200   \n",
-       "\n",
-       "            COUNT(readings)  NUM_UNIQUE(readings.signal_id)  \\\n",
-       "turbine_id                                                    \n",
-       "T001                   3744                              26   \n",
-       "T001                   3744                              26   \n",
-       "T001                   3744                              26   \n",
-       "T001                   3744                              26   \n",
-       "T001                   3744                              26   \n",
-       "\n",
-       "           MODE(readings.signal_id)  NUM_UNIQUE(readings.DAY(timestamp))  ...  \\\n",
-       "turbine_id                                                                ...   \n",
-       "T001                            S01                                    2  ...   \n",
-       "T001                            S01                                    2  ...   \n",
-       "T001                            S01                                    2  ...   \n",
-       "T001                            S01                                    2  ...   \n",
-       "T001                            S01                                    2  ...   \n",
-       "\n",
-       "            MEAN(readings.signals.NUM_UNIQUE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                                   \n",
-       "T001                                                      2.0                \n",
-       "T001                                                      2.0                \n",
-       "T001                                                      2.0                \n",
-       "T001                                                      2.0                \n",
-       "T001                                                      2.0                \n",
-       "\n",
-       "            MEAN(readings.signals.NUM_UNIQUE(readings.YEAR(timestamp)))  \\\n",
-       "turbine_id                                                                \n",
-       "T001                                                      1.0             \n",
-       "T001                                                      1.0             \n",
-       "T001                                                      1.0             \n",
-       "T001                                                      1.0             \n",
-       "T001                                                      1.0             \n",
-       "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                               \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
-       "turbine_id                                                                \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                                   \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
-       "turbine_id                                                                 \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "\n",
-       "            MODE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                         \n",
-       "T001                                                       11      \n",
-       "T001                                                       12      \n",
-       "T001                                                       13      \n",
-       "T001                                                       14      \n",
-       "T001                                                       15      \n",
-       "\n",
-       "            MODE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
-       "turbine_id                                                          \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "\n",
-       "            MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                             \n",
-       "T001                                                        4          \n",
-       "T001                                                        5          \n",
-       "T001                                                        6          \n",
-       "T001                                                        0          \n",
-       "T001                                                        1          \n",
-       "\n",
-       "            MODE(readings.signals.MODE(readings.MONTH(timestamp)))  \n",
-       "turbine_id                                                          \n",
-       "T001                                                        1       \n",
-       "T001                                                        1       \n",
-       "T001                                                        1       \n",
-       "T001                                                        1       \n",
-       "T001                                                        1       \n",
-       "\n",
-       "[5 rows x 99 columns]"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['X'].head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "99"
-      ]
-     },
-     "execution_count": 13,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# features generated (the turbine_id is set as index).\n",
-    "len(context['X'].columns)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>323.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S02</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>320.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S03</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>284.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S04</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>348.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S05</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>273.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id signal_id  timestamp  value\n",
-       "0       T001       S01 2013-01-10  323.0\n",
-       "1       T001       S02 2013-01-10  320.0\n",
-       "2       T001       S03 2013-01-10  284.0\n",
-       "3       T001       S04 2013-01-10  348.0\n",
-       "4       T001       S05 2013-01-10  273.0"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## mlprimitives.custom.feature_extraction.CategoricalEncoder\n",
-    "\n",
-    "* Input: X\n",
-    "* Output: X (label encoded)\n",
-    "* Effect: encodes categorical features using OneHotLabelEncoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 2\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>SUM(readings.value)</th>\n",
-       "      <th>STD(readings.value)</th>\n",
-       "      <th>MAX(readings.value)</th>\n",
-       "      <th>SKEW(readings.value)</th>\n",
-       "      <th>MIN(readings.value)</th>\n",
-       "      <th>MEAN(readings.value)</th>\n",
-       "      <th>COUNT(readings)</th>\n",
-       "      <th>NUM_UNIQUE(readings.signal_id)</th>\n",
-       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
-       "      <th>...</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.DAY(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.YEAR(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.MONTH(timestamp)))</th>\n",
-       "      <th>MODE(readings.signal_id)=S01</th>\n",
-       "      <th>MODE(readings.signals.MODE(readings.turbine_id))=T001</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>3.433649e+09</td>\n",
-       "      <td>1.456860e+06</td>\n",
-       "      <td>3448719.0</td>\n",
-       "      <td>1.019214</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>917107.079193</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>26</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>11</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>4</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>3.441489e+09</td>\n",
-       "      <td>1.459865e+06</td>\n",
-       "      <td>3453777.0</td>\n",
-       "      <td>1.018761</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>919201.162179</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>26</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>12</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>5</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>3.455470e+09</td>\n",
-       "      <td>1.465277e+06</td>\n",
-       "      <td>3463880.0</td>\n",
-       "      <td>1.018192</td>\n",
-       "      <td>2.7</td>\n",
-       "      <td>922935.352244</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>26</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>13</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>6</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>3.475361e+09</td>\n",
-       "      <td>1.473337e+06</td>\n",
-       "      <td>3474703.0</td>\n",
-       "      <td>1.017666</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>928248.092869</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>26</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>14</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>0</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>2.888083e+09</td>\n",
-       "      <td>1.477958e+06</td>\n",
-       "      <td>3485019.0</td>\n",
-       "      <td>1.032002</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>924186.531200</td>\n",
-       "      <td>3744</td>\n",
-       "      <td>26</td>\n",
-       "      <td>2</td>\n",
-       "      <td>1</td>\n",
-       "      <td>...</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>15</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 99 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            SUM(readings.value)  STD(readings.value)  MAX(readings.value)  \\\n",
-       "turbine_id                                                                  \n",
-       "T001               3.433649e+09         1.456860e+06            3448719.0   \n",
-       "T001               3.441489e+09         1.459865e+06            3453777.0   \n",
-       "T001               3.455470e+09         1.465277e+06            3463880.0   \n",
-       "T001               3.475361e+09         1.473337e+06            3474703.0   \n",
-       "T001               2.888083e+09         1.477958e+06            3485019.0   \n",
-       "\n",
-       "            SKEW(readings.value)  MIN(readings.value)  MEAN(readings.value)  \\\n",
-       "turbine_id                                                                    \n",
-       "T001                    1.019214                  0.0         917107.079193   \n",
-       "T001                    1.018761                  0.0         919201.162179   \n",
-       "T001                    1.018192                  2.7         922935.352244   \n",
-       "T001                    1.017666                 -1.0         928248.092869   \n",
-       "T001                    1.032002                  0.0         924186.531200   \n",
-       "\n",
-       "            COUNT(readings)  NUM_UNIQUE(readings.signal_id)  \\\n",
-       "turbine_id                                                    \n",
-       "T001                   3744                              26   \n",
-       "T001                   3744                              26   \n",
-       "T001                   3744                              26   \n",
-       "T001                   3744                              26   \n",
-       "T001                   3744                              26   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
-       "turbine_id                                        \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.MONTH(timestamp))  ...  \\\n",
-       "turbine_id                                         ...   \n",
-       "T001                                            1  ...   \n",
-       "T001                                            1  ...   \n",
-       "T001                                            1  ...   \n",
-       "T001                                            1  ...   \n",
-       "T001                                            1  ...   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                               \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "T001                                                        1            \n",
-       "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
-       "turbine_id                                                                \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "T001                                                        1             \n",
-       "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                                   \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "T001                                                        1                \n",
-       "\n",
-       "            NUM_UNIQUE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
-       "turbine_id                                                                 \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "T001                                                        1              \n",
-       "\n",
-       "            MODE(readings.signals.MODE(readings.DAY(timestamp)))  \\\n",
-       "turbine_id                                                         \n",
-       "T001                                                       11      \n",
-       "T001                                                       12      \n",
-       "T001                                                       13      \n",
-       "T001                                                       14      \n",
-       "T001                                                       15      \n",
-       "\n",
-       "            MODE(readings.signals.MODE(readings.YEAR(timestamp)))  \\\n",
-       "turbine_id                                                          \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "T001                                                     2013       \n",
-       "\n",
-       "            MODE(readings.signals.MODE(readings.WEEKDAY(timestamp)))  \\\n",
-       "turbine_id                                                             \n",
-       "T001                                                        4          \n",
-       "T001                                                        5          \n",
-       "T001                                                        6          \n",
-       "T001                                                        0          \n",
-       "T001                                                        1          \n",
-       "\n",
-       "            MODE(readings.signals.MODE(readings.MONTH(timestamp)))  \\\n",
-       "turbine_id                                                           \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
-       "T001                                                        1        \n",
-       "\n",
-       "            MODE(readings.signal_id)=S01  \\\n",
-       "turbine_id                                 \n",
-       "T001                                   1   \n",
-       "T001                                   1   \n",
-       "T001                                   1   \n",
-       "T001                                   1   \n",
-       "T001                                   1   \n",
-       "\n",
-       "            MODE(readings.signals.MODE(readings.turbine_id))=T001  \n",
-       "turbine_id                                                         \n",
-       "T001                                                        1      \n",
-       "T001                                                        1      \n",
-       "T001                                                        1      \n",
-       "T001                                                        1      \n",
-       "T001                                                        1      \n",
-       "\n",
-       "[5 rows x 99 columns]"
-      ]
-     },
-     "execution_count": 17,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['X'].head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>323.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S02</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>320.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S03</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>284.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S04</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>348.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S05</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>273.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id signal_id  timestamp  value\n",
-       "0       T001       S01 2013-01-10  323.0\n",
-       "1       T001       S02 2013-01-10  320.0\n",
-       "2       T001       S03 2013-01-10  284.0\n",
-       "3       T001       S04 2013-01-10  348.0\n",
-       "4       T001       S05 2013-01-10  273.0"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## xgboost.XGBClassifier\n",
-    "\n",
-    "* Input: X (label encoded and featurized)\n",
-    "* Output: None\n",
-    "* Effect: trained model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 3\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/tutorials/pipelines/dfs_xgb_with_unstack_normalization.ipynb b/tutorials/pipelines/dfs_xgb_with_unstack_normalization.ipynb
deleted file mode 100644
index 5731706..0000000
--- a/tutorials/pipelines/dfs_xgb_with_unstack_normalization.ipynb
+++ /dev/null
@@ -1,1611 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# dfs_xgb_with_unstack_normalization"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from draco.demo import load_demo\n",
-    "\n",
-    "target_times, readings = load_demo()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pipeline_name = 'dfs_xgb_with_unstack_normalization'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/sarah/opt/anaconda3/envs/draco/lib/python3.7/site-packages/dask/dataframe/utils.py:14: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
-      "  import pandas.util.testing as tm\n"
-     ]
-    }
-   ],
-   "source": [
-    "from draco.pipeline import DracoPipeline\n",
-    "\n",
-    "pipeline = DracoPipeline(pipeline_name)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['mlblocks.MLPipeline',\n",
-       " 'mlblocks.MLPipeline',\n",
-       " 'featuretools.dfs',\n",
-       " 'mlprimitives.custom.feature_extraction.CategoricalEncoder',\n",
-       " 'xgboost.XGBClassifier']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "pipeline.template['primitives']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Step by Step execution"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Input Data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>signal_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S01</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>323.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S02</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>320.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S03</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>284.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S04</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>348.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>S05</td>\n",
-       "      <td>2013-01-10</td>\n",
-       "      <td>273.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id signal_id  timestamp  value\n",
-       "0       T001       S01 2013-01-10  323.0\n",
-       "1       T001       S02 2013-01-10  320.0\n",
-       "2       T001       S03 2013-01-10  284.0\n",
-       "3       T001       S04 2013-01-10  348.0\n",
-       "4       T001       S05 2013-01-10  273.0"
-      ]
-     },
-     "execution_count": 5,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "readings.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>cutoff_time</th>\n",
-       "      <th>target</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-12</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-13</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-14</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-15</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-16</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id cutoff_time  target\n",
-       "0       T001  2013-01-12       0\n",
-       "1       T001  2013-01-13       0\n",
-       "2       T001  2013-01-14       0\n",
-       "3       T001  2013-01-15       1\n",
-       "4       T001  2013-01-16       0"
-      ]
-     },
-     "execution_count": 6,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "target_times.head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Data Preparation (part of Draco Pipeline)\n",
-    "\n",
-    "* Input: target_times, readings, turbines\n",
-    "* Output: X, y, readings, turbines\n",
-    "* Effect: target_times has been split into X and y"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## mlblocks.MLPipeline 1\n",
-    "\n",
-    "### pandas.DataFrame.resample\n",
-    "\n",
-    "* Input: readings\n",
-    "* Output: readings (resampled)\n",
-    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
-    "  signal_id and timestamp have been set as a multi-index\n",
-    "  \n",
-    "### pandas.DataFrame.unstack\n",
-    "\n",
-    "* Input: readings (resampled)\n",
-    "* Output: readings (unstacked)\n",
-    "* Effect: readings have been unstacked"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 7,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 0\n",
-    "context = pipeline.fit(target_times, readings, output_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 8,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value_S01</th>\n",
-       "      <th>value_S02</th>\n",
-       "      <th>value_S03</th>\n",
-       "      <th>value_S04</th>\n",
-       "      <th>value_S05</th>\n",
-       "      <th>value_S06</th>\n",
-       "      <th>value_S07</th>\n",
-       "      <th>value_S08</th>\n",
-       "      <th>...</th>\n",
-       "      <th>value_S17</th>\n",
-       "      <th>value_S18</th>\n",
-       "      <th>value_S19</th>\n",
-       "      <th>value_S20</th>\n",
-       "      <th>value_S21</th>\n",
-       "      <th>value_S22</th>\n",
-       "      <th>value_S23</th>\n",
-       "      <th>value_S24</th>\n",
-       "      <th>value_S25</th>\n",
-       "      <th>value_S26</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>323.0</td>\n",
-       "      <td>320.0</td>\n",
-       "      <td>284.0</td>\n",
-       "      <td>348.0</td>\n",
-       "      <td>273.0</td>\n",
-       "      <td>342.0</td>\n",
-       "      <td>280.0</td>\n",
-       "      <td>3197842.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>11.7</td>\n",
-       "      <td>3131020.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>356.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
-       "      <td>346.0</td>\n",
-       "      <td>384.0</td>\n",
-       "      <td>367.0</td>\n",
-       "      <td>411.0</td>\n",
-       "      <td>331.0</td>\n",
-       "      <td>360.0</td>\n",
-       "      <td>249.0</td>\n",
-       "      <td>3197900.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.2</td>\n",
-       "      <td>3131420.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>63.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>400.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>363.0</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>393.0</td>\n",
-       "      <td>275.0</td>\n",
-       "      <td>335.0</td>\n",
-       "      <td>270.0</td>\n",
-       "      <td>3197968.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.5</td>\n",
-       "      <td>3131822.0</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>66.0</td>\n",
-       "      <td>46.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>402.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
-       "      <td>257.0</td>\n",
-       "      <td>307.0</td>\n",
-       "      <td>315.0</td>\n",
-       "      <td>361.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>354.0</td>\n",
-       "      <td>271.0</td>\n",
-       "      <td>3198011.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.5</td>\n",
-       "      <td>3132179.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>60.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>357.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
-       "      <td>267.0</td>\n",
-       "      <td>309.0</td>\n",
-       "      <td>314.0</td>\n",
-       "      <td>355.0</td>\n",
-       "      <td>262.0</td>\n",
-       "      <td>246.0</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>3198056.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.6</td>\n",
-       "      <td>3132501.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>54.0</td>\n",
-       "      <td>59.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>41.0</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>322.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 28 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id           timestamp  value_S01  value_S02  value_S03  value_S04  \\\n",
-       "0       T001 2013-01-10 00:00:00      323.0      320.0      284.0      348.0   \n",
-       "1       T001 2013-01-10 00:10:00      346.0      384.0      367.0      411.0   \n",
-       "2       T001 2013-01-10 00:20:00      407.0      363.0      407.0      393.0   \n",
-       "3       T001 2013-01-10 00:30:00      257.0      307.0      315.0      361.0   \n",
-       "4       T001 2013-01-10 00:40:00      267.0      309.0      314.0      355.0   \n",
-       "\n",
-       "   value_S05  value_S06  value_S07  value_S08  ...  value_S17  value_S18  \\\n",
-       "0      273.0      342.0      280.0  3197842.0  ...       11.7  3131020.0   \n",
-       "1      331.0      360.0      249.0  3197900.0  ...       10.2  3131420.0   \n",
-       "2      275.0      335.0      270.0  3197968.0  ...        9.5  3131822.0   \n",
-       "3      317.0      354.0      271.0  3198011.0  ...       10.5  3132179.0   \n",
-       "4      262.0      246.0      212.0  3198056.0  ...        9.6  3132501.0   \n",
-       "\n",
-       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
-       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
-       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
-       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
-       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
-       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
-       "\n",
-       "   value_S25  value_S26  \n",
-       "0       47.0      356.0  \n",
-       "1       42.0      400.0  \n",
-       "2       45.0      402.0  \n",
-       "3       45.0      357.0  \n",
-       "4       36.0      322.0  \n",
-       "\n",
-       "[5 rows x 28 columns]"
-      ]
-     },
-     "execution_count": 9,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## mlblocks.MLPipeline 2\n",
-    "\n",
-    "###  featuretools.EntitySet.entity_from_dataframe\n",
-    "\n",
-    "* Input: readings (resampled)\n",
-    "* Output: entityset\n",
-    "* Effect: Entityset has been generated from readings\n",
-    "\n",
-    "### featuretools.EntitySet.normalize_entity\n",
-    "\n",
-    "* Input: entityset\n",
-    "* Output: entityset with relationship (readings.turbine_id with turbines.turbine_id)\n",
-    "* Effect: establish relation between readings and turbines"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 10,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 1\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'X', 'y', 'entityset'])"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Entityset: entityset\n",
-       "  Entities:\n",
-       "    readings [Rows: 51121, Columns: 29]\n",
-       "    turbines [Rows: 1, Columns: 1]\n",
-       "  Relationships:\n",
-       "    readings.turbine_id -> turbines.turbine_id"
-      ]
-     },
-     "execution_count": 12,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['entityset']"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## featuretools.dfs\n",
-    "\n",
-    "* Input: entityset (unstacked, no turbine_id, no timestamp)\n",
-    "* Output: X (has additional features)\n",
-    "* Effect: build features for relational dataset using DFS"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 2\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys() "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>SUM(readings.value_S14)</th>\n",
-       "      <th>SUM(readings.value_S11)</th>\n",
-       "      <th>SUM(readings.value_S25)</th>\n",
-       "      <th>SUM(readings.value_S23)</th>\n",
-       "      <th>SUM(readings.value_S17)</th>\n",
-       "      <th>SUM(readings.value_S19)</th>\n",
-       "      <th>SUM(readings.value_S04)</th>\n",
-       "      <th>SUM(readings.value_S05)</th>\n",
-       "      <th>SUM(readings.value_S21)</th>\n",
-       "      <th>SUM(readings.value_S16)</th>\n",
-       "      <th>...</th>\n",
-       "      <th>MEAN(readings.value_S20)</th>\n",
-       "      <th>COUNT(readings)</th>\n",
-       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.WEEKDAY(timestamp))</th>\n",
-       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
-       "      <th>MODE(readings.MONTH(timestamp))</th>\n",
-       "      <th>MODE(readings.DAY(timestamp))</th>\n",
-       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>465421817.0</td>\n",
-       "      <td>496362516.0</td>\n",
-       "      <td>2743.0</td>\n",
-       "      <td>2780.0</td>\n",
-       "      <td>994.6</td>\n",
-       "      <td>3174.0</td>\n",
-       "      <td>19412.0</td>\n",
-       "      <td>17083.0</td>\n",
-       "      <td>3061.0</td>\n",
-       "      <td>550.4</td>\n",
-       "      <td>...</td>\n",
-       "      <td>22.326389</td>\n",
-       "      <td>144</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>11</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>465897578.0</td>\n",
-       "      <td>496952628.0</td>\n",
-       "      <td>4237.0</td>\n",
-       "      <td>4640.0</td>\n",
-       "      <td>1166.7</td>\n",
-       "      <td>5112.0</td>\n",
-       "      <td>38289.0</td>\n",
-       "      <td>34344.0</td>\n",
-       "      <td>4919.0</td>\n",
-       "      <td>713.7</td>\n",
-       "      <td>...</td>\n",
-       "      <td>35.166667</td>\n",
-       "      <td>144</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>12</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>466806830.0</td>\n",
-       "      <td>498019072.0</td>\n",
-       "      <td>9008.0</td>\n",
-       "      <td>9179.0</td>\n",
-       "      <td>1581.7</td>\n",
-       "      <td>9134.0</td>\n",
-       "      <td>86707.0</td>\n",
-       "      <td>78749.0</td>\n",
-       "      <td>9863.0</td>\n",
-       "      <td>916.3</td>\n",
-       "      <td>...</td>\n",
-       "      <td>53.381944</td>\n",
-       "      <td>144</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>13</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>468250434.0</td>\n",
-       "      <td>499530451.0</td>\n",
-       "      <td>10073.0</td>\n",
-       "      <td>10310.0</td>\n",
-       "      <td>1690.9</td>\n",
-       "      <td>10674.0</td>\n",
-       "      <td>87907.0</td>\n",
-       "      <td>83264.0</td>\n",
-       "      <td>10638.0</td>\n",
-       "      <td>970.6</td>\n",
-       "      <td>...</td>\n",
-       "      <td>61.423611</td>\n",
-       "      <td>144</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>14</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>371675934.0</td>\n",
-       "      <td>400196323.0</td>\n",
-       "      <td>7381.0</td>\n",
-       "      <td>8228.0</td>\n",
-       "      <td>1666.0</td>\n",
-       "      <td>8831.0</td>\n",
-       "      <td>68811.0</td>\n",
-       "      <td>64088.0</td>\n",
-       "      <td>8629.0</td>\n",
-       "      <td>948.8</td>\n",
-       "      <td>...</td>\n",
-       "      <td>87.575221</td>\n",
-       "      <td>144</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>15</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 165 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            SUM(readings.value_S14)  SUM(readings.value_S11)  \\\n",
-       "turbine_id                                                     \n",
-       "T001                    465421817.0              496362516.0   \n",
-       "T001                    465897578.0              496952628.0   \n",
-       "T001                    466806830.0              498019072.0   \n",
-       "T001                    468250434.0              499530451.0   \n",
-       "T001                    371675934.0              400196323.0   \n",
-       "\n",
-       "            SUM(readings.value_S25)  SUM(readings.value_S23)  \\\n",
-       "turbine_id                                                     \n",
-       "T001                         2743.0                   2780.0   \n",
-       "T001                         4237.0                   4640.0   \n",
-       "T001                         9008.0                   9179.0   \n",
-       "T001                        10073.0                  10310.0   \n",
-       "T001                         7381.0                   8228.0   \n",
-       "\n",
-       "            SUM(readings.value_S17)  SUM(readings.value_S19)  \\\n",
-       "turbine_id                                                     \n",
-       "T001                          994.6                   3174.0   \n",
-       "T001                         1166.7                   5112.0   \n",
-       "T001                         1581.7                   9134.0   \n",
-       "T001                         1690.9                  10674.0   \n",
-       "T001                         1666.0                   8831.0   \n",
-       "\n",
-       "            SUM(readings.value_S04)  SUM(readings.value_S05)  \\\n",
-       "turbine_id                                                     \n",
-       "T001                        19412.0                  17083.0   \n",
-       "T001                        38289.0                  34344.0   \n",
-       "T001                        86707.0                  78749.0   \n",
-       "T001                        87907.0                  83264.0   \n",
-       "T001                        68811.0                  64088.0   \n",
-       "\n",
-       "            SUM(readings.value_S21)  SUM(readings.value_S16)  ...  \\\n",
-       "turbine_id                                                    ...   \n",
-       "T001                         3061.0                    550.4  ...   \n",
-       "T001                         4919.0                    713.7  ...   \n",
-       "T001                         9863.0                    916.3  ...   \n",
-       "T001                        10638.0                    970.6  ...   \n",
-       "T001                         8629.0                    948.8  ...   \n",
-       "\n",
-       "            MEAN(readings.value_S20)  COUNT(readings)  \\\n",
-       "turbine_id                                              \n",
-       "T001                       22.326389              144   \n",
-       "T001                       35.166667              144   \n",
-       "T001                       53.381944              144   \n",
-       "T001                       61.423611              144   \n",
-       "T001                       87.575221              144   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.YEAR(timestamp))  \\\n",
-       "turbine_id                                         \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.MONTH(timestamp))  \\\n",
-       "turbine_id                                          \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
-       "turbine_id                                        \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.WEEKDAY(timestamp))  \\\n",
-       "turbine_id                                            \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "\n",
-       "            MODE(readings.YEAR(timestamp))  MODE(readings.MONTH(timestamp))  \\\n",
-       "turbine_id                                                                    \n",
-       "T001                                  2013                                1   \n",
-       "T001                                  2013                                1   \n",
-       "T001                                  2013                                1   \n",
-       "T001                                  2013                                1   \n",
-       "T001                                  2013                                1   \n",
-       "\n",
-       "            MODE(readings.DAY(timestamp))  MODE(readings.WEEKDAY(timestamp))  \n",
-       "turbine_id                                                                    \n",
-       "T001                                   11                                  4  \n",
-       "T001                                   12                                  5  \n",
-       "T001                                   13                                  6  \n",
-       "T001                                   14                                  0  \n",
-       "T001                                   15                                  1  \n",
-       "\n",
-       "[5 rows x 165 columns]"
-      ]
-     },
-     "execution_count": 15,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['X'].head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 16,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "165"
-      ]
-     },
-     "execution_count": 16,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "# features generated (the turbine_id is set as index).\n",
-    "len(context['X'].columns)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## mlprimitives.custom.feature_extraction.CategoricalEncoder\n",
-    "\n",
-    "* Input: X\n",
-    "* Output: X (label encoded)\n",
-    "* Effect: encodes categorical features using OneHotLabelEncoder"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 3\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 18,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 19,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>SUM(readings.value_S14)</th>\n",
-       "      <th>SUM(readings.value_S11)</th>\n",
-       "      <th>SUM(readings.value_S25)</th>\n",
-       "      <th>SUM(readings.value_S23)</th>\n",
-       "      <th>SUM(readings.value_S17)</th>\n",
-       "      <th>SUM(readings.value_S19)</th>\n",
-       "      <th>SUM(readings.value_S04)</th>\n",
-       "      <th>SUM(readings.value_S05)</th>\n",
-       "      <th>SUM(readings.value_S21)</th>\n",
-       "      <th>SUM(readings.value_S16)</th>\n",
-       "      <th>...</th>\n",
-       "      <th>MEAN(readings.value_S20)</th>\n",
-       "      <th>COUNT(readings)</th>\n",
-       "      <th>NUM_UNIQUE(readings.YEAR(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.MONTH(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.DAY(timestamp))</th>\n",
-       "      <th>NUM_UNIQUE(readings.WEEKDAY(timestamp))</th>\n",
-       "      <th>MODE(readings.YEAR(timestamp))</th>\n",
-       "      <th>MODE(readings.MONTH(timestamp))</th>\n",
-       "      <th>MODE(readings.DAY(timestamp))</th>\n",
-       "      <th>MODE(readings.WEEKDAY(timestamp))</th>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "      <th></th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>465421817.0</td>\n",
-       "      <td>496362516.0</td>\n",
-       "      <td>2743.0</td>\n",
-       "      <td>2780.0</td>\n",
-       "      <td>994.6</td>\n",
-       "      <td>3174.0</td>\n",
-       "      <td>19412.0</td>\n",
-       "      <td>17083.0</td>\n",
-       "      <td>3061.0</td>\n",
-       "      <td>550.4</td>\n",
-       "      <td>...</td>\n",
-       "      <td>22.326389</td>\n",
-       "      <td>144</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>11</td>\n",
-       "      <td>4</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>465897578.0</td>\n",
-       "      <td>496952628.0</td>\n",
-       "      <td>4237.0</td>\n",
-       "      <td>4640.0</td>\n",
-       "      <td>1166.7</td>\n",
-       "      <td>5112.0</td>\n",
-       "      <td>38289.0</td>\n",
-       "      <td>34344.0</td>\n",
-       "      <td>4919.0</td>\n",
-       "      <td>713.7</td>\n",
-       "      <td>...</td>\n",
-       "      <td>35.166667</td>\n",
-       "      <td>144</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>12</td>\n",
-       "      <td>5</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>466806830.0</td>\n",
-       "      <td>498019072.0</td>\n",
-       "      <td>9008.0</td>\n",
-       "      <td>9179.0</td>\n",
-       "      <td>1581.7</td>\n",
-       "      <td>9134.0</td>\n",
-       "      <td>86707.0</td>\n",
-       "      <td>78749.0</td>\n",
-       "      <td>9863.0</td>\n",
-       "      <td>916.3</td>\n",
-       "      <td>...</td>\n",
-       "      <td>53.381944</td>\n",
-       "      <td>144</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>13</td>\n",
-       "      <td>6</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>468250434.0</td>\n",
-       "      <td>499530451.0</td>\n",
-       "      <td>10073.0</td>\n",
-       "      <td>10310.0</td>\n",
-       "      <td>1690.9</td>\n",
-       "      <td>10674.0</td>\n",
-       "      <td>87907.0</td>\n",
-       "      <td>83264.0</td>\n",
-       "      <td>10638.0</td>\n",
-       "      <td>970.6</td>\n",
-       "      <td>...</td>\n",
-       "      <td>61.423611</td>\n",
-       "      <td>144</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>14</td>\n",
-       "      <td>0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>T001</th>\n",
-       "      <td>371675934.0</td>\n",
-       "      <td>400196323.0</td>\n",
-       "      <td>7381.0</td>\n",
-       "      <td>8228.0</td>\n",
-       "      <td>1666.0</td>\n",
-       "      <td>8831.0</td>\n",
-       "      <td>68811.0</td>\n",
-       "      <td>64088.0</td>\n",
-       "      <td>8629.0</td>\n",
-       "      <td>948.8</td>\n",
-       "      <td>...</td>\n",
-       "      <td>87.575221</td>\n",
-       "      <td>144</td>\n",
-       "      <td>1</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2</td>\n",
-       "      <td>2013</td>\n",
-       "      <td>1</td>\n",
-       "      <td>15</td>\n",
-       "      <td>1</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 165 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "            SUM(readings.value_S14)  SUM(readings.value_S11)  \\\n",
-       "turbine_id                                                     \n",
-       "T001                    465421817.0              496362516.0   \n",
-       "T001                    465897578.0              496952628.0   \n",
-       "T001                    466806830.0              498019072.0   \n",
-       "T001                    468250434.0              499530451.0   \n",
-       "T001                    371675934.0              400196323.0   \n",
-       "\n",
-       "            SUM(readings.value_S25)  SUM(readings.value_S23)  \\\n",
-       "turbine_id                                                     \n",
-       "T001                         2743.0                   2780.0   \n",
-       "T001                         4237.0                   4640.0   \n",
-       "T001                         9008.0                   9179.0   \n",
-       "T001                        10073.0                  10310.0   \n",
-       "T001                         7381.0                   8228.0   \n",
-       "\n",
-       "            SUM(readings.value_S17)  SUM(readings.value_S19)  \\\n",
-       "turbine_id                                                     \n",
-       "T001                          994.6                   3174.0   \n",
-       "T001                         1166.7                   5112.0   \n",
-       "T001                         1581.7                   9134.0   \n",
-       "T001                         1690.9                  10674.0   \n",
-       "T001                         1666.0                   8831.0   \n",
-       "\n",
-       "            SUM(readings.value_S04)  SUM(readings.value_S05)  \\\n",
-       "turbine_id                                                     \n",
-       "T001                        19412.0                  17083.0   \n",
-       "T001                        38289.0                  34344.0   \n",
-       "T001                        86707.0                  78749.0   \n",
-       "T001                        87907.0                  83264.0   \n",
-       "T001                        68811.0                  64088.0   \n",
-       "\n",
-       "            SUM(readings.value_S21)  SUM(readings.value_S16)  ...  \\\n",
-       "turbine_id                                                    ...   \n",
-       "T001                         3061.0                    550.4  ...   \n",
-       "T001                         4919.0                    713.7  ...   \n",
-       "T001                         9863.0                    916.3  ...   \n",
-       "T001                        10638.0                    970.6  ...   \n",
-       "T001                         8629.0                    948.8  ...   \n",
-       "\n",
-       "            MEAN(readings.value_S20)  COUNT(readings)  \\\n",
-       "turbine_id                                              \n",
-       "T001                       22.326389              144   \n",
-       "T001                       35.166667              144   \n",
-       "T001                       53.381944              144   \n",
-       "T001                       61.423611              144   \n",
-       "T001                       87.575221              144   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.YEAR(timestamp))  \\\n",
-       "turbine_id                                         \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "T001                                           1   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.MONTH(timestamp))  \\\n",
-       "turbine_id                                          \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "T001                                            1   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.DAY(timestamp))  \\\n",
-       "turbine_id                                        \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "T001                                          2   \n",
-       "\n",
-       "            NUM_UNIQUE(readings.WEEKDAY(timestamp))  \\\n",
-       "turbine_id                                            \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "T001                                              2   \n",
-       "\n",
-       "            MODE(readings.YEAR(timestamp))  MODE(readings.MONTH(timestamp))  \\\n",
-       "turbine_id                                                                    \n",
-       "T001                                  2013                                1   \n",
-       "T001                                  2013                                1   \n",
-       "T001                                  2013                                1   \n",
-       "T001                                  2013                                1   \n",
-       "T001                                  2013                                1   \n",
-       "\n",
-       "            MODE(readings.DAY(timestamp))  MODE(readings.WEEKDAY(timestamp))  \n",
-       "turbine_id                                                                    \n",
-       "T001                                   11                                  4  \n",
-       "T001                                   12                                  5  \n",
-       "T001                                   13                                  6  \n",
-       "T001                                   14                                  0  \n",
-       "T001                                   15                                  1  \n",
-       "\n",
-       "[5 rows x 165 columns]"
-      ]
-     },
-     "execution_count": 19,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['X'].head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>turbine_id</th>\n",
-       "      <th>timestamp</th>\n",
-       "      <th>value_S01</th>\n",
-       "      <th>value_S02</th>\n",
-       "      <th>value_S03</th>\n",
-       "      <th>value_S04</th>\n",
-       "      <th>value_S05</th>\n",
-       "      <th>value_S06</th>\n",
-       "      <th>value_S07</th>\n",
-       "      <th>value_S08</th>\n",
-       "      <th>...</th>\n",
-       "      <th>value_S17</th>\n",
-       "      <th>value_S18</th>\n",
-       "      <th>value_S19</th>\n",
-       "      <th>value_S20</th>\n",
-       "      <th>value_S21</th>\n",
-       "      <th>value_S22</th>\n",
-       "      <th>value_S23</th>\n",
-       "      <th>value_S24</th>\n",
-       "      <th>value_S25</th>\n",
-       "      <th>value_S26</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>323.0</td>\n",
-       "      <td>320.0</td>\n",
-       "      <td>284.0</td>\n",
-       "      <td>348.0</td>\n",
-       "      <td>273.0</td>\n",
-       "      <td>342.0</td>\n",
-       "      <td>280.0</td>\n",
-       "      <td>3197842.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>11.7</td>\n",
-       "      <td>3131020.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>356.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
-       "      <td>346.0</td>\n",
-       "      <td>384.0</td>\n",
-       "      <td>367.0</td>\n",
-       "      <td>411.0</td>\n",
-       "      <td>331.0</td>\n",
-       "      <td>360.0</td>\n",
-       "      <td>249.0</td>\n",
-       "      <td>3197900.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.2</td>\n",
-       "      <td>3131420.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>63.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>400.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>363.0</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>393.0</td>\n",
-       "      <td>275.0</td>\n",
-       "      <td>335.0</td>\n",
-       "      <td>270.0</td>\n",
-       "      <td>3197968.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.5</td>\n",
-       "      <td>3131822.0</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>66.0</td>\n",
-       "      <td>46.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>402.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
-       "      <td>257.0</td>\n",
-       "      <td>307.0</td>\n",
-       "      <td>315.0</td>\n",
-       "      <td>361.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>354.0</td>\n",
-       "      <td>271.0</td>\n",
-       "      <td>3198011.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>10.5</td>\n",
-       "      <td>3132179.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>60.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>357.0</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
-       "      <td>267.0</td>\n",
-       "      <td>309.0</td>\n",
-       "      <td>314.0</td>\n",
-       "      <td>355.0</td>\n",
-       "      <td>262.0</td>\n",
-       "      <td>246.0</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>3198056.0</td>\n",
-       "      <td>...</td>\n",
-       "      <td>9.6</td>\n",
-       "      <td>3132501.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>54.0</td>\n",
-       "      <td>59.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>41.0</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>322.0</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "<p>5 rows × 28 columns</p>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "  turbine_id           timestamp  value_S01  value_S02  value_S03  value_S04  \\\n",
-       "0       T001 2013-01-10 00:00:00      323.0      320.0      284.0      348.0   \n",
-       "1       T001 2013-01-10 00:10:00      346.0      384.0      367.0      411.0   \n",
-       "2       T001 2013-01-10 00:20:00      407.0      363.0      407.0      393.0   \n",
-       "3       T001 2013-01-10 00:30:00      257.0      307.0      315.0      361.0   \n",
-       "4       T001 2013-01-10 00:40:00      267.0      309.0      314.0      355.0   \n",
-       "\n",
-       "   value_S05  value_S06  value_S07  value_S08  ...  value_S17  value_S18  \\\n",
-       "0      273.0      342.0      280.0  3197842.0  ...       11.7  3131020.0   \n",
-       "1      331.0      360.0      249.0  3197900.0  ...       10.2  3131420.0   \n",
-       "2      275.0      335.0      270.0  3197968.0  ...        9.5  3131822.0   \n",
-       "3      317.0      354.0      271.0  3198011.0  ...       10.5  3132179.0   \n",
-       "4      262.0      246.0      212.0  3198056.0  ...        9.6  3132501.0   \n",
-       "\n",
-       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
-       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
-       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
-       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
-       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
-       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
-       "\n",
-       "   value_S25  value_S26  \n",
-       "0       47.0      356.0  \n",
-       "1       42.0      400.0  \n",
-       "2       45.0      402.0  \n",
-       "3       45.0      357.0  \n",
-       "4       36.0      322.0  \n",
-       "\n",
-       "[5 rows x 28 columns]"
-      ]
-     },
-     "execution_count": 20,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context['readings'].head()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## xgboost.XGBClassifier\n",
-    "\n",
-    "* Input: X (label encoded and featurized)\n",
-    "* Output: None\n",
-    "* Effect: trained model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 21,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "step = 4\n",
-    "context = pipeline.fit(**context, output_=step, start_=step)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 22,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "dict_keys(['readings', 'turbines', 'entityset', 'X', 'y'])"
-      ]
-     },
-     "execution_count": 22,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "context.keys()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.11"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From 8a580338edf93825dd9c6688025f7dc5a0482c4c Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Sun, 23 Apr 2023 19:24:19 -0400
Subject: [PATCH 164/171] Migrate mlstars (#75)

* remove dfs pipelines

* change mlstars

* change mlprimitives to mlstars

* fix tutorials

* update mlstars version
---
 draco/pipeline.py                             |    2 +-
 draco/pipelines/double_lstm/double_lstm.json  |    6 +-
 .../double_lstm/double_lstm_prob.json         |    6 +-
 .../double_lstm_prob_with_unstack.json        |    6 +-
 .../double_lstm/double_lstm_with_unstack.json |    6 +-
 draco/pipelines/dummy/dummy.json              |   10 +-
 draco/pipelines/lstm/lstm.json                |    6 +-
 draco/pipelines/lstm/lstm_prob.json           |    6 +-
 .../lstm/lstm_prob_with_unstack.json          |    6 +-
 draco/pipelines/lstm/lstm_with_unstack.json   |    6 +-
 .../lstm_regressor/lstm_regressor.json        |    6 +-
 .../lstm_regressor_with_unstack.json          |    6 +-
 setup.py                                      |    5 +-
 tests/test_pipeline.py                        |    8 +
 .../pipelines/double_lstm_with_unstack.ipynb  | 2097 +++++++-------
 .../lstm_regressor_with_unstack.ipynb         | 2416 ++++++++---------
 tutorials/pipelines/lstm_with_unstack.ipynb   | 1845 +++++++------
 17 files changed, 3331 insertions(+), 3112 deletions(-)

diff --git a/draco/pipeline.py b/draco/pipeline.py
index 6a9adf6..fee44aa 100644
--- a/draco/pipeline.py
+++ b/draco/pipeline.py
@@ -15,7 +15,7 @@
 from btb.tuning import Tunable
 from mlblocks import MLPipeline
 from mlblocks.discovery import load_pipeline
-from mlprimitives.adapters.keras import Sequential
+from mlstars.adapters.keras import Sequential
 from sklearn.exceptions import NotFittedError
 from sklearn.model_selection import KFold, StratifiedKFold
 
diff --git a/draco/pipelines/double_lstm/double_lstm.json b/draco/pipelines/double_lstm/double_lstm.json
index e3be8a5..8e5e4fd 100644
--- a/draco/pipelines/double_lstm/double_lstm.json
+++ b/draco/pipelines/double_lstm/double_lstm.json
@@ -7,7 +7,7 @@
         "pandas.DataFrame",
         "pandas.DataFrame.set",
         "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences",
         "keras.Sequential.DoubleLSTMTimeSeriesClassifier"
     ],
     "init_params": {
@@ -33,7 +33,7 @@
         "pandas.DataFrame.set#2": {
             "key": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
@@ -67,7 +67,7 @@
             "X": "readings",
             "value": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "timeseries": "readings"
         }
     },
diff --git a/draco/pipelines/double_lstm/double_lstm_prob.json b/draco/pipelines/double_lstm/double_lstm_prob.json
index a118af0..0a20648 100644
--- a/draco/pipelines/double_lstm/double_lstm_prob.json
+++ b/draco/pipelines/double_lstm/double_lstm_prob.json
@@ -7,7 +7,7 @@
         "pandas.DataFrame",
         "pandas.DataFrame.set",
         "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences",
         "keras.Sequential.DoubleLSTMTimeSeriesClassifier",
         "numpy.take"
     ],
@@ -34,7 +34,7 @@
         "pandas.DataFrame.set#2": {
             "key": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
@@ -74,7 +74,7 @@
             "X": "readings",
             "value": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "timeseries": "readings"
         }
     },
diff --git a/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json b/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json
index ea48a87..b46c4e3 100644
--- a/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json
+++ b/draco/pipelines/double_lstm/double_lstm_prob_with_unstack.json
@@ -9,7 +9,7 @@
         "pandas.DataFrame",
         "pandas.DataFrame.set",
         "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences",
         "keras.Sequential.DoubleLSTMTimeSeriesClassifier",
         "numpy.take"
     ],
@@ -50,7 +50,7 @@
         "pandas.DataFrame.set#2": {
             "key": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
@@ -96,7 +96,7 @@
             "X": "readings",
             "value": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "timeseries": "readings"
         }
     },
diff --git a/draco/pipelines/double_lstm/double_lstm_with_unstack.json b/draco/pipelines/double_lstm/double_lstm_with_unstack.json
index dede502..f1992a6 100644
--- a/draco/pipelines/double_lstm/double_lstm_with_unstack.json
+++ b/draco/pipelines/double_lstm/double_lstm_with_unstack.json
@@ -9,7 +9,7 @@
         "pandas.DataFrame",
         "pandas.DataFrame.set",
         "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences",
         "keras.Sequential.DoubleLSTMTimeSeriesClassifier"
     ],
     "init_params": {
@@ -49,7 +49,7 @@
         "pandas.DataFrame.set#2": {
             "key": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
@@ -89,7 +89,7 @@
             "X": "readings",
             "value": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "timeseries": "readings"
         }
     },
diff --git a/draco/pipelines/dummy/dummy.json b/draco/pipelines/dummy/dummy.json
index a28121e..6175f5b 100644
--- a/draco/pipelines/dummy/dummy.json
+++ b/draco/pipelines/dummy/dummy.json
@@ -1,11 +1,7 @@
 {
     "primitives": [
-        "mlprimitives.custom.preprocessing.ClassEncoder",
-        "mlprimitives.custom.feature_extraction.DatetimeFeaturizer",
-        "mlprimitives.custom.feature_extraction.CategoricalEncoder",
-        "mlprimitives.custom.feature_extraction.StringVectorizer",
         "sklearn.impute.SimpleImputer",
-        "sklearn.linear_model.LogisticRegression",
-        "mlprimitives.custom.preprocessing.ClassDecoder"
+        "sklearn.preprocessing.MinMaxScaler",
+        "sklearn.linear_model.LogisticRegression"
     ]
-}
\ No newline at end of file
+}
diff --git a/draco/pipelines/lstm/lstm.json b/draco/pipelines/lstm/lstm.json
index c29b1c7..b430fb0 100644
--- a/draco/pipelines/lstm/lstm.json
+++ b/draco/pipelines/lstm/lstm.json
@@ -7,7 +7,7 @@
         "pandas.DataFrame",
         "pandas.DataFrame.set",
         "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences",
         "keras.Sequential.LSTMTimeSeriesClassifier"
     ],
     "init_params": {
@@ -33,7 +33,7 @@
         "pandas.DataFrame.set#2": {
             "key": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
@@ -67,7 +67,7 @@
             "X": "readings",
             "value": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "timeseries": "readings"
         }
     },
diff --git a/draco/pipelines/lstm/lstm_prob.json b/draco/pipelines/lstm/lstm_prob.json
index 17da404..df135ba 100644
--- a/draco/pipelines/lstm/lstm_prob.json
+++ b/draco/pipelines/lstm/lstm_prob.json
@@ -7,7 +7,7 @@
         "pandas.DataFrame",
         "pandas.DataFrame.set",
         "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences",
         "keras.Sequential.LSTMTimeSeriesClassifier",
         "numpy.take"
     ],
@@ -34,7 +34,7 @@
         "pandas.DataFrame.set#2": {
             "key": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
@@ -74,7 +74,7 @@
             "X": "readings",
             "value": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "timeseries": "readings"
         }
     },
diff --git a/draco/pipelines/lstm/lstm_prob_with_unstack.json b/draco/pipelines/lstm/lstm_prob_with_unstack.json
index 9272257..2bf6172 100644
--- a/draco/pipelines/lstm/lstm_prob_with_unstack.json
+++ b/draco/pipelines/lstm/lstm_prob_with_unstack.json
@@ -9,7 +9,7 @@
         "pandas.DataFrame",
         "pandas.DataFrame.set",
         "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences",
         "keras.Sequential.LSTMTimeSeriesClassifier",
         "numpy.take"
     ],
@@ -50,7 +50,7 @@
         "pandas.DataFrame.set#2": {
             "key": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
@@ -96,7 +96,7 @@
             "X": "readings",
             "value": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "timeseries": "readings"
         }
     },
diff --git a/draco/pipelines/lstm/lstm_with_unstack.json b/draco/pipelines/lstm/lstm_with_unstack.json
index ab9dd99..86ffc4b 100644
--- a/draco/pipelines/lstm/lstm_with_unstack.json
+++ b/draco/pipelines/lstm/lstm_with_unstack.json
@@ -9,7 +9,7 @@
         "pandas.DataFrame",
         "pandas.DataFrame.set",
         "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences",
         "keras.Sequential.LSTMTimeSeriesClassifier"
     ],
     "init_params": {
@@ -49,7 +49,7 @@
         "pandas.DataFrame.set#2": {
             "key": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
@@ -89,7 +89,7 @@
             "X": "readings",
             "value": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "timeseries": "readings"
         }
     },
diff --git a/draco/pipelines/lstm_regressor/lstm_regressor.json b/draco/pipelines/lstm_regressor/lstm_regressor.json
index 77ddb1e..82ed77f 100644
--- a/draco/pipelines/lstm_regressor/lstm_regressor.json
+++ b/draco/pipelines/lstm_regressor/lstm_regressor.json
@@ -7,7 +7,7 @@
         "pandas.DataFrame",
         "pandas.DataFrame.set",
         "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences",
         "keras.Sequential.LSTMTimeSeriesRegressor"
     ],
     "init_params": {
@@ -33,7 +33,7 @@
         "pandas.DataFrame.set#2": {
             "key": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
@@ -67,7 +67,7 @@
             "X": "readings",
             "value": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "timeseries": "readings"
         }
     },
diff --git a/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json b/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json
index 9e183b9..225c47e 100644
--- a/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json
+++ b/draco/pipelines/lstm_regressor/lstm_regressor_with_unstack.json
@@ -9,7 +9,7 @@
         "pandas.DataFrame",
         "pandas.DataFrame.set",
         "pandas.DataFrame.set",
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences",
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences",
         "keras.Sequential.LSTMTimeSeriesRegressor"
     ],
     "init_params": {
@@ -49,7 +49,7 @@
         "pandas.DataFrame.set#2": {
             "key": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "window_size": 24,
             "cutoff_time": "cutoff_time",
             "time_index": "timestamp"
@@ -89,7 +89,7 @@
             "X": "readings",
             "value": "timestamp"
         },
-        "mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
+        "mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1": {
             "timeseries": "readings"
         }
     },
diff --git a/setup.py b/setup.py
index 1dfcc4b..9087746 100644
--- a/setup.py
+++ b/setup.py
@@ -17,7 +17,7 @@
 
 install_requires = [
     'baytune>=0.4.0,<0.5',
-    'mlprimitives>=0.3.2,<0.4',
+    'ml-stars>=0.1.0',
     'mlblocks>=0.4.0,<0.5',
     'pymongo>=3.7.2,<4',
     'scikit-learn>=0.21,<1.2',
@@ -30,11 +30,10 @@
     'fsspec>=0.8.5,<0.9',
     'dask>=2.6.0,<3',
     'tabulate>=0.8.3,<0.9',
-    'xlsxwriter>=1.3.6<1.4',
+    'xlsxwriter>=1.3.6,<1.4',
     # fix conflicts
     'protobuf<4',
     'importlib-metadata<5',
-    #'importlib-metadata<2,>=0.12',
 ]
 
 setup_requires = [
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 3b7359f..b27de6f 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -7,6 +7,7 @@
 
 import pandas as pd
 import pytest
+from mlblocks import MLPipeline
 
 from draco.pipeline import DracoPipeline, get_pipelines
 
@@ -28,6 +29,13 @@ def test_get_pipelines_type_error():
         get_pipelines(pipeline_type='does-not-exist')
 
 
+def test_loading_pipelines():
+    draco_pipelines = get_pipelines()
+    for pipeline in draco_pipelines:
+        mlpipeline = MLPipeline(pipeline)
+        assert isinstance(mlpipeline, MLPipeline)
+
+
 class TestDracoPipeline(TestCase):
 
     def _get_data(self):
diff --git a/tutorials/pipelines/double_lstm_with_unstack.ipynb b/tutorials/pipelines/double_lstm_with_unstack.ipynb
index 4bc7d0f..043dae8 100644
--- a/tutorials/pipelines/double_lstm_with_unstack.ipynb
+++ b/tutorials/pipelines/double_lstm_with_unstack.ipynb
@@ -46,7 +46,8 @@
     {
      "data": {
       "text/plain": [
-       "['mlblocks.MLPipeline',\n",
+       "['pandas.DataFrame.resample',\n",
+       " 'pandas.DataFrame.unstack',\n",
        " 'pandas.DataFrame.pop',\n",
        " 'pandas.DataFrame.pop',\n",
        " 'sklearn.impute.SimpleImputer',\n",
@@ -54,7 +55,7 @@
        " 'pandas.DataFrame',\n",
        " 'pandas.DataFrame.set',\n",
        " 'pandas.DataFrame.set',\n",
-       " 'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
+       " 'mlstars.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
        " 'keras.Sequential.DoubleLSTMTimeSeriesClassifier']"
       ]
      },
@@ -270,20 +271,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## mlblocks.MLPipeline\n",
-    "\n",
-    "### pandas.DataFrame.resample\n",
+    "## pandas.DataFrame.resample\n",
     "\n",
     "* Input: readings\n",
     "* Output: readings (resampled)\n",
     "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
-    "  signal_id and timestamp have been set as a multi-index\n",
-    "  \n",
-    "### pandas.DataFrame.unstack\n",
-    "\n",
-    "* Input: readings (resampled)\n",
-    "* Output: readings (unstacked)\n",
-    "* Effect: readings have been unstacked"
+    "  signal_id and timestamp have been set as a multi-index\n"
    ]
   },
   {
@@ -319,6 +312,130 @@
    "cell_type": "code",
    "execution_count": 9,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">T001</th>\n",
+       "      <th rowspan=\"5\" valign=\"top\">S01</th>\n",
+       "      <th>2013-01-10 00:00:00</th>\n",
+       "      <td>313.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 01:00:00</th>\n",
+       "      <td>197.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 02:00:00</th>\n",
+       "      <td>248.166667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 03:00:00</th>\n",
+       "      <td>253.166667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 04:00:00</th>\n",
+       "      <td>305.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               value\n",
+       "turbine_id signal_id timestamp                      \n",
+       "T001       S01       2013-01-10 00:00:00  313.333333\n",
+       "                     2013-01-10 01:00:00  197.500000\n",
+       "                     2013-01-10 02:00:00  248.166667\n",
+       "                     2013-01-10 03:00:00  253.166667\n",
+       "                     2013-01-10 04:00:00  305.000000"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.unstack\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: readings (unstacked)\n",
+    "* Effect: readings have been unstacked"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 1\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -369,121 +486,121 @@
        "      <th>0</th>\n",
        "      <td>T001</td>\n",
        "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>323.0</td>\n",
-       "      <td>320.0</td>\n",
-       "      <td>284.0</td>\n",
-       "      <td>348.0</td>\n",
-       "      <td>273.0</td>\n",
-       "      <td>342.0</td>\n",
-       "      <td>280.0</td>\n",
-       "      <td>3197842.0</td>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>11.7</td>\n",
-       "      <td>3131020.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>356.0</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
-       "      <td>346.0</td>\n",
-       "      <td>384.0</td>\n",
-       "      <td>367.0</td>\n",
-       "      <td>411.0</td>\n",
-       "      <td>331.0</td>\n",
-       "      <td>360.0</td>\n",
-       "      <td>249.0</td>\n",
-       "      <td>3197900.0</td>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.2</td>\n",
-       "      <td>3131420.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>63.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>400.0</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>363.0</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>393.0</td>\n",
-       "      <td>275.0</td>\n",
-       "      <td>335.0</td>\n",
-       "      <td>270.0</td>\n",
-       "      <td>3197968.0</td>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.5</td>\n",
-       "      <td>3131822.0</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>66.0</td>\n",
-       "      <td>46.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>402.0</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
-       "      <td>257.0</td>\n",
-       "      <td>307.0</td>\n",
-       "      <td>315.0</td>\n",
-       "      <td>361.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>354.0</td>\n",
-       "      <td>271.0</td>\n",
-       "      <td>3198011.0</td>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.5</td>\n",
-       "      <td>3132179.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>60.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>357.0</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
-       "      <td>267.0</td>\n",
-       "      <td>309.0</td>\n",
-       "      <td>314.0</td>\n",
-       "      <td>355.0</td>\n",
-       "      <td>262.0</td>\n",
-       "      <td>246.0</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>3198056.0</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.6</td>\n",
-       "      <td>3132501.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>54.0</td>\n",
-       "      <td>59.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>41.0</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>322.0</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -491,38 +608,38 @@
        "</div>"
       ],
       "text/plain": [
-       "  turbine_id           timestamp  value_S01  value_S02  value_S03  value_S04  \\\n",
-       "0       T001 2013-01-10 00:00:00      323.0      320.0      284.0      348.0   \n",
-       "1       T001 2013-01-10 00:10:00      346.0      384.0      367.0      411.0   \n",
-       "2       T001 2013-01-10 00:20:00      407.0      363.0      407.0      393.0   \n",
-       "3       T001 2013-01-10 00:30:00      257.0      307.0      315.0      361.0   \n",
-       "4       T001 2013-01-10 00:40:00      267.0      309.0      314.0      355.0   \n",
+       "  turbine_id           timestamp   value_S01   value_S02   value_S03  \\\n",
+       "0       T001 2013-01-10 00:00:00  313.333333  323.833333  336.000000   \n",
+       "1       T001 2013-01-10 01:00:00  197.500000  221.333333  216.000000   \n",
+       "2       T001 2013-01-10 02:00:00  248.166667  271.666667  277.500000   \n",
+       "3       T001 2013-01-10 03:00:00  253.166667  256.166667  242.666667   \n",
+       "4       T001 2013-01-10 04:00:00  305.000000  312.333333  346.166667   \n",
        "\n",
-       "   value_S05  value_S06  value_S07  value_S08  ...  value_S17  value_S18  \\\n",
-       "0      273.0      342.0      280.0  3197842.0  ...       11.7  3131020.0   \n",
-       "1      331.0      360.0      249.0  3197900.0  ...       10.2  3131420.0   \n",
-       "2      275.0      335.0      270.0  3197968.0  ...        9.5  3131822.0   \n",
-       "3      317.0      354.0      271.0  3198011.0  ...       10.5  3132179.0   \n",
-       "4      262.0      246.0      212.0  3198056.0  ...        9.6  3132501.0   \n",
+       "    value_S04   value_S05   value_S06   value_S07     value_S08  ...  \\\n",
+       "0  364.666667  286.500000  314.000000  243.166667  3.197980e+06  ...   \n",
+       "1  260.666667  206.833333  235.833333  186.666667  3.198221e+06  ...   \n",
+       "2  298.000000  233.666667  271.166667  216.333333  3.198448e+06  ...   \n",
+       "3  265.333333  211.666667  226.666667  181.000000  3.198691e+06  ...   \n",
+       "4  329.833333  280.666667  308.833333  271.833333  3.198978e+06  ...   \n",
        "\n",
-       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
-       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
-       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
-       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
-       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
-       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
+       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
+       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
+       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
+       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
+       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
        "\n",
-       "   value_S25  value_S26  \n",
-       "0       47.0      356.0  \n",
-       "1       42.0      400.0  \n",
-       "2       45.0      402.0  \n",
-       "3       45.0      357.0  \n",
-       "4       36.0      322.0  \n",
+       "   value_S23  value_S24  value_S25   value_S26  \n",
+       "0  47.666667  52.666667  40.833333  357.333333  \n",
+       "1  34.500000  39.333333  31.166667  249.666667  \n",
+       "2  39.333333  45.500000  36.166667  297.666667  \n",
+       "3  35.333333  37.833333  30.333333  268.000000  \n",
+       "4  46.166667  49.666667  41.166667  341.833333  \n",
        "\n",
        "[5 rows x 28 columns]"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -544,17 +661,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 1\n",
+    "step = 2\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -563,7 +680,7 @@
        "dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -574,7 +691,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -588,7 +705,7 @@
        "Name: turbine_id, dtype: object"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -599,7 +716,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -650,122 +767,122 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>323.0</td>\n",
-       "      <td>320.0</td>\n",
-       "      <td>284.0</td>\n",
-       "      <td>348.0</td>\n",
-       "      <td>273.0</td>\n",
-       "      <td>342.0</td>\n",
-       "      <td>280.0</td>\n",
-       "      <td>3197842.0</td>\n",
-       "      <td>695000.0</td>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
+       "      <td>695143.166667</td>\n",
        "      <td>...</td>\n",
-       "      <td>11.7</td>\n",
-       "      <td>3131020.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>356.0</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
-       "      <td>346.0</td>\n",
-       "      <td>384.0</td>\n",
-       "      <td>367.0</td>\n",
-       "      <td>411.0</td>\n",
-       "      <td>331.0</td>\n",
-       "      <td>360.0</td>\n",
-       "      <td>249.0</td>\n",
-       "      <td>3197900.0</td>\n",
-       "      <td>695063.0</td>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
+       "      <td>695403.666667</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.2</td>\n",
-       "      <td>3131420.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>63.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>400.0</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>363.0</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>393.0</td>\n",
-       "      <td>275.0</td>\n",
-       "      <td>335.0</td>\n",
-       "      <td>270.0</td>\n",
-       "      <td>3197968.0</td>\n",
-       "      <td>695124.0</td>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
+       "      <td>695656.500000</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.5</td>\n",
-       "      <td>3131822.0</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>66.0</td>\n",
-       "      <td>46.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>402.0</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
-       "      <td>257.0</td>\n",
-       "      <td>307.0</td>\n",
-       "      <td>315.0</td>\n",
-       "      <td>361.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>354.0</td>\n",
-       "      <td>271.0</td>\n",
-       "      <td>3198011.0</td>\n",
-       "      <td>695175.0</td>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
+       "      <td>695911.333333</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.5</td>\n",
-       "      <td>3132179.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>60.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>357.0</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
-       "      <td>267.0</td>\n",
-       "      <td>309.0</td>\n",
-       "      <td>314.0</td>\n",
-       "      <td>355.0</td>\n",
-       "      <td>262.0</td>\n",
-       "      <td>246.0</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>3198056.0</td>\n",
-       "      <td>695226.0</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
+       "      <td>696195.833333</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.6</td>\n",
-       "      <td>3132501.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>54.0</td>\n",
-       "      <td>59.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>41.0</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>322.0</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -773,38 +890,38 @@
        "</div>"
       ],
       "text/plain": [
-       "            timestamp  value_S01  value_S02  value_S03  value_S04  value_S05  \\\n",
-       "0 2013-01-10 00:00:00      323.0      320.0      284.0      348.0      273.0   \n",
-       "1 2013-01-10 00:10:00      346.0      384.0      367.0      411.0      331.0   \n",
-       "2 2013-01-10 00:20:00      407.0      363.0      407.0      393.0      275.0   \n",
-       "3 2013-01-10 00:30:00      257.0      307.0      315.0      361.0      317.0   \n",
-       "4 2013-01-10 00:40:00      267.0      309.0      314.0      355.0      262.0   \n",
+       "            timestamp   value_S01   value_S02   value_S03   value_S04  \\\n",
+       "0 2013-01-10 00:00:00  313.333333  323.833333  336.000000  364.666667   \n",
+       "1 2013-01-10 01:00:00  197.500000  221.333333  216.000000  260.666667   \n",
+       "2 2013-01-10 02:00:00  248.166667  271.666667  277.500000  298.000000   \n",
+       "3 2013-01-10 03:00:00  253.166667  256.166667  242.666667  265.333333   \n",
+       "4 2013-01-10 04:00:00  305.000000  312.333333  346.166667  329.833333   \n",
        "\n",
-       "   value_S06  value_S07  value_S08  value_S09  ...  value_S17  value_S18  \\\n",
-       "0      342.0      280.0  3197842.0   695000.0  ...       11.7  3131020.0   \n",
-       "1      360.0      249.0  3197900.0   695063.0  ...       10.2  3131420.0   \n",
-       "2      335.0      270.0  3197968.0   695124.0  ...        9.5  3131822.0   \n",
-       "3      354.0      271.0  3198011.0   695175.0  ...       10.5  3132179.0   \n",
-       "4      246.0      212.0  3198056.0   695226.0  ...        9.6  3132501.0   \n",
+       "    value_S05   value_S06   value_S07     value_S08      value_S09  ...  \\\n",
+       "0  286.500000  314.000000  243.166667  3.197980e+06  695143.166667  ...   \n",
+       "1  206.833333  235.833333  186.666667  3.198221e+06  695403.666667  ...   \n",
+       "2  233.666667  271.166667  216.333333  3.198448e+06  695656.500000  ...   \n",
+       "3  211.666667  226.666667  181.000000  3.198691e+06  695911.333333  ...   \n",
+       "4  280.666667  308.833333  271.833333  3.198978e+06  696195.833333  ...   \n",
        "\n",
-       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
-       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
-       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
-       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
-       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
-       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
+       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
+       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
+       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
+       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
+       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
        "\n",
-       "   value_S25  value_S26  \n",
-       "0       47.0      356.0  \n",
-       "1       42.0      400.0  \n",
-       "2       45.0      402.0  \n",
-       "3       45.0      357.0  \n",
-       "4       36.0      322.0  \n",
+       "   value_S23  value_S24  value_S25   value_S26  \n",
+       "0  47.666667  52.666667  40.833333  357.333333  \n",
+       "1  34.500000  39.333333  31.166667  249.666667  \n",
+       "2  39.333333  45.500000  36.166667  297.666667  \n",
+       "3  35.333333  37.833333  30.333333  268.000000  \n",
+       "4  46.166667  49.666667  41.166667  341.833333  \n",
        "\n",
        "[5 rows x 27 columns]"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -826,17 +943,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 2\n",
+    "step = 3\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -845,7 +962,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -856,21 +973,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "0   2013-01-10 00:00:00\n",
-       "1   2013-01-10 00:10:00\n",
-       "2   2013-01-10 00:20:00\n",
-       "3   2013-01-10 00:30:00\n",
-       "4   2013-01-10 00:40:00\n",
+       "1   2013-01-10 01:00:00\n",
+       "2   2013-01-10 02:00:00\n",
+       "3   2013-01-10 03:00:00\n",
+       "4   2013-01-10 04:00:00\n",
        "Name: timestamp, dtype: datetime64[ns]"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -881,7 +998,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -931,123 +1048,123 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>323.0</td>\n",
-       "      <td>320.0</td>\n",
-       "      <td>284.0</td>\n",
-       "      <td>348.0</td>\n",
-       "      <td>273.0</td>\n",
-       "      <td>342.0</td>\n",
-       "      <td>280.0</td>\n",
-       "      <td>3197842.0</td>\n",
-       "      <td>695000.0</td>\n",
-       "      <td>3348234.0</td>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
+       "      <td>695143.166667</td>\n",
+       "      <td>3.348384e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>11.7</td>\n",
-       "      <td>3131020.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>356.0</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>346.0</td>\n",
-       "      <td>384.0</td>\n",
-       "      <td>367.0</td>\n",
-       "      <td>411.0</td>\n",
-       "      <td>331.0</td>\n",
-       "      <td>360.0</td>\n",
-       "      <td>249.0</td>\n",
-       "      <td>3197900.0</td>\n",
-       "      <td>695063.0</td>\n",
-       "      <td>3348296.0</td>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
+       "      <td>695403.666667</td>\n",
+       "      <td>3.348651e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.2</td>\n",
-       "      <td>3131420.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>63.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>400.0</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>407.0</td>\n",
-       "      <td>363.0</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>393.0</td>\n",
-       "      <td>275.0</td>\n",
-       "      <td>335.0</td>\n",
-       "      <td>270.0</td>\n",
-       "      <td>3197968.0</td>\n",
-       "      <td>695124.0</td>\n",
-       "      <td>3348363.0</td>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
+       "      <td>695656.500000</td>\n",
+       "      <td>3.348910e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.5</td>\n",
-       "      <td>3131822.0</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>66.0</td>\n",
-       "      <td>46.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>402.0</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>257.0</td>\n",
-       "      <td>307.0</td>\n",
-       "      <td>315.0</td>\n",
-       "      <td>361.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>354.0</td>\n",
-       "      <td>271.0</td>\n",
-       "      <td>3198011.0</td>\n",
-       "      <td>695175.0</td>\n",
-       "      <td>3348416.0</td>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
+       "      <td>695911.333333</td>\n",
+       "      <td>3.349157e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.5</td>\n",
-       "      <td>3132179.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>60.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>357.0</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>267.0</td>\n",
-       "      <td>309.0</td>\n",
-       "      <td>314.0</td>\n",
-       "      <td>355.0</td>\n",
-       "      <td>262.0</td>\n",
-       "      <td>246.0</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>3198056.0</td>\n",
-       "      <td>695226.0</td>\n",
-       "      <td>3348470.0</td>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
+       "      <td>696195.833333</td>\n",
+       "      <td>3.349452e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.6</td>\n",
-       "      <td>3132501.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>54.0</td>\n",
-       "      <td>59.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>41.0</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>322.0</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1055,38 +1172,38 @@
        "</div>"
       ],
       "text/plain": [
-       "   value_S01  value_S02  value_S03  value_S04  value_S05  value_S06  \\\n",
-       "0      323.0      320.0      284.0      348.0      273.0      342.0   \n",
-       "1      346.0      384.0      367.0      411.0      331.0      360.0   \n",
-       "2      407.0      363.0      407.0      393.0      275.0      335.0   \n",
-       "3      257.0      307.0      315.0      361.0      317.0      354.0   \n",
-       "4      267.0      309.0      314.0      355.0      262.0      246.0   \n",
+       "    value_S01   value_S02   value_S03   value_S04   value_S05   value_S06  \\\n",
+       "0  313.333333  323.833333  336.000000  364.666667  286.500000  314.000000   \n",
+       "1  197.500000  221.333333  216.000000  260.666667  206.833333  235.833333   \n",
+       "2  248.166667  271.666667  277.500000  298.000000  233.666667  271.166667   \n",
+       "3  253.166667  256.166667  242.666667  265.333333  211.666667  226.666667   \n",
+       "4  305.000000  312.333333  346.166667  329.833333  280.666667  308.833333   \n",
        "\n",
-       "   value_S07  value_S08  value_S09  value_S10  ...  value_S17  value_S18  \\\n",
-       "0      280.0  3197842.0   695000.0  3348234.0  ...       11.7  3131020.0   \n",
-       "1      249.0  3197900.0   695063.0  3348296.0  ...       10.2  3131420.0   \n",
-       "2      270.0  3197968.0   695124.0  3348363.0  ...        9.5  3131822.0   \n",
-       "3      271.0  3198011.0   695175.0  3348416.0  ...       10.5  3132179.0   \n",
-       "4      212.0  3198056.0   695226.0  3348470.0  ...        9.6  3132501.0   \n",
+       "    value_S07     value_S08      value_S09     value_S10  ...  value_S17  \\\n",
+       "0  243.166667  3.197980e+06  695143.166667  3.348384e+06  ...  10.383333   \n",
+       "1  186.666667  3.198221e+06  695403.666667  3.348651e+06  ...   8.666667   \n",
+       "2  216.333333  3.198448e+06  695656.500000  3.348910e+06  ...   8.833333   \n",
+       "3  181.000000  3.198691e+06  695911.333333  3.349157e+06  ...   8.433333   \n",
+       "4  271.833333  3.198978e+06  696195.833333  3.349452e+06  ...   9.083333   \n",
        "\n",
-       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
-       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
-       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
-       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
-       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
-       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "      value_S18  value_S19  value_S20  value_S21  value_S22  value_S23  \\\n",
+       "0  3.131958e+06  52.666667  54.333333  56.166667  61.000000  47.666667   \n",
+       "1  3.133668e+06  33.166667  37.000000  36.166667  43.666667  34.500000   \n",
+       "2  3.135413e+06  41.500000  45.666667  46.500000  49.666667  39.333333   \n",
+       "3  3.137001e+06  42.333333  42.833333  40.500000  44.166667  35.333333   \n",
+       "4  3.138843e+06  50.500000  51.166667  55.500000  53.666667  46.166667   \n",
        "\n",
-       "   value_S25  value_S26  \n",
-       "0       47.0      356.0  \n",
-       "1       42.0      400.0  \n",
-       "2       45.0      402.0  \n",
-       "3       45.0      357.0  \n",
-       "4       36.0      322.0  \n",
+       "   value_S24  value_S25   value_S26  \n",
+       "0  52.666667  40.833333  357.333333  \n",
+       "1  39.333333  31.166667  249.666667  \n",
+       "2  45.500000  36.166667  297.666667  \n",
+       "3  37.833333  30.333333  268.000000  \n",
+       "4  49.666667  41.166667  341.833333  \n",
        "\n",
        "[5 rows x 26 columns]"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1108,17 +1225,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 21,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/sklearn/impute/_base.py:356: FutureWarning: The 'verbose' parameter was deprecated in version 1.1 and will be removed in 1.3. A warning will always be raised upon the removal of empty columns in the future version.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
-    "step = 3\n",
+    "step = 4\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -1127,7 +1253,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1138,50 +1264,50 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[3.230000e+02, 3.200000e+02, 2.840000e+02, 3.480000e+02,\n",
-       "        2.730000e+02, 3.420000e+02, 2.800000e+02, 3.197842e+06,\n",
-       "        6.950000e+05, 3.348234e+06, 3.436762e+06, 3.322362e+06,\n",
-       "        3.357952e+06, 3.223797e+06, 8.300000e+00, 6.000000e+00,\n",
-       "        1.170000e+01, 3.131020e+06, 5.500000e+01, 5.500000e+01,\n",
-       "        4.700000e+01, 5.800000e+01, 4.500000e+01, 5.800000e+01,\n",
-       "        4.700000e+01, 3.560000e+02],\n",
-       "       [3.460000e+02, 3.840000e+02, 3.670000e+02, 4.110000e+02,\n",
-       "        3.310000e+02, 3.600000e+02, 2.490000e+02, 3.197900e+06,\n",
-       "        6.950630e+05, 3.348296e+06, 3.436829e+06, 3.322417e+06,\n",
-       "        3.358013e+06, 3.223839e+06, 7.600000e+00, 5.000000e+00,\n",
-       "        1.020000e+01, 3.131420e+06, 5.800000e+01, 6.300000e+01,\n",
-       "        6.200000e+01, 6.700000e+01, 5.500000e+01, 6.100000e+01,\n",
-       "        4.200000e+01, 4.000000e+02],\n",
-       "       [4.070000e+02, 3.630000e+02, 4.070000e+02, 3.930000e+02,\n",
-       "        2.750000e+02, 3.350000e+02, 2.700000e+02, 3.197968e+06,\n",
-       "        6.951240e+05, 3.348363e+06, 3.436895e+06, 3.322463e+06,\n",
-       "        3.358068e+06, 3.223884e+06, 7.800000e+00, 5.700000e+00,\n",
-       "        9.500000e+00, 3.131822e+06, 6.800000e+01, 6.100000e+01,\n",
-       "        6.700000e+01, 6.600000e+01, 4.600000e+01, 5.500000e+01,\n",
-       "        4.500000e+01, 4.020000e+02],\n",
-       "       [2.570000e+02, 3.070000e+02, 3.150000e+02, 3.610000e+02,\n",
-       "        3.170000e+02, 3.540000e+02, 2.710000e+02, 3.198011e+06,\n",
-       "        6.951750e+05, 3.348416e+06, 3.436957e+06, 3.322516e+06,\n",
-       "        3.358128e+06, 3.223929e+06, 8.600000e+00, 6.600000e+00,\n",
-       "        1.050000e+01, 3.132179e+06, 4.300000e+01, 5.100000e+01,\n",
-       "        5.300000e+01, 6.200000e+01, 5.300000e+01, 6.000000e+01,\n",
-       "        4.500000e+01, 3.570000e+02],\n",
-       "       [2.670000e+02, 3.090000e+02, 3.140000e+02, 3.550000e+02,\n",
-       "        2.620000e+02, 2.460000e+02, 2.120000e+02, 3.198056e+06,\n",
-       "        6.952260e+05, 3.348470e+06, 3.437016e+06, 3.322559e+06,\n",
-       "        3.358169e+06, 3.223965e+06, 7.500000e+00, 5.900000e+00,\n",
-       "        9.600000e+00, 3.132501e+06, 4.500000e+01, 5.100000e+01,\n",
-       "        5.400000e+01, 5.900000e+01, 4.300000e+01, 4.100000e+01,\n",
-       "        3.600000e+01, 3.220000e+02]])"
+       "array([[3.13333333e+02, 3.23833333e+02, 3.36000000e+02, 3.64666667e+02,\n",
+       "        2.86500000e+02, 3.14000000e+02, 2.43166667e+02, 3.19798000e+06,\n",
+       "        6.95143167e+05, 3.34838383e+06, 3.43692150e+06, 3.32248667e+06,\n",
+       "        3.35809000e+06, 3.22390150e+06, 7.95000000e+00, 5.85000000e+00,\n",
+       "        1.03833333e+01, 3.13195833e+06, 5.26666667e+01, 5.43333333e+01,\n",
+       "        5.61666667e+01, 6.10000000e+01, 4.76666667e+01, 5.26666667e+01,\n",
+       "        4.08333333e+01, 3.57333333e+02],\n",
+       "       [1.97500000e+02, 2.21333333e+02, 2.16000000e+02, 2.60666667e+02,\n",
+       "        2.06833333e+02, 2.35833333e+02, 1.86666667e+02, 3.19822067e+06,\n",
+       "        6.95403667e+05, 3.34865117e+06, 3.43722283e+06, 3.32272200e+06,\n",
+       "        3.35834000e+06, 3.22409567e+06, 6.83333333e+00, 5.15000000e+00,\n",
+       "        8.66666667e+00, 3.13366817e+06, 3.31666667e+01, 3.70000000e+01,\n",
+       "        3.61666667e+01, 4.36666667e+01, 3.45000000e+01, 3.93333333e+01,\n",
+       "        3.11666667e+01, 2.49666667e+02],\n",
+       "       [2.48166667e+02, 2.71666667e+02, 2.77500000e+02, 2.98000000e+02,\n",
+       "        2.33666667e+02, 2.71166667e+02, 2.16333333e+02, 3.19844767e+06,\n",
+       "        6.95656500e+05, 3.34890967e+06, 3.43751900e+06, 3.32295950e+06,\n",
+       "        3.35862067e+06, 3.22432333e+06, 7.11666667e+00, 5.56666667e+00,\n",
+       "        8.83333333e+00, 3.13541283e+06, 4.15000000e+01, 4.56666667e+01,\n",
+       "        4.65000000e+01, 4.96666667e+01, 3.93333333e+01, 4.55000000e+01,\n",
+       "        3.61666667e+01, 2.97666667e+02],\n",
+       "       [2.53166667e+02, 2.56166667e+02, 2.42666667e+02, 2.65333333e+02,\n",
+       "        2.11666667e+02, 2.26666667e+02, 1.81000000e+02, 3.19869117e+06,\n",
+       "        6.95911333e+05, 3.34915717e+06, 3.43778050e+06, 3.32316850e+06,\n",
+       "        3.35884883e+06, 3.22450217e+06, 6.71666667e+00, 5.16666667e+00,\n",
+       "        8.43333333e+00, 3.13700133e+06, 4.23333333e+01, 4.28333333e+01,\n",
+       "        4.05000000e+01, 4.41666667e+01, 3.53333333e+01, 3.78333333e+01,\n",
+       "        3.03333333e+01, 2.68000000e+02],\n",
+       "       [3.05000000e+02, 3.12333333e+02, 3.46166667e+02, 3.29833333e+02,\n",
+       "        2.80666667e+02, 3.08833333e+02, 2.71833333e+02, 3.19897850e+06,\n",
+       "        6.96195833e+05, 3.34945200e+06, 3.43807767e+06, 3.32340933e+06,\n",
+       "        3.35910983e+06, 3.22471400e+06, 7.20000000e+00, 5.28333333e+00,\n",
+       "        9.08333333e+00, 3.13884333e+06, 5.05000000e+01, 5.11666667e+01,\n",
+       "        5.55000000e+01, 5.36666667e+01, 4.61666667e+01, 4.96666667e+01,\n",
+       "        4.11666667e+01, 3.41833333e+02]])"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1203,17 +1329,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 4\n",
+    "step = 5\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -1222,7 +1348,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1233,45 +1359,45 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[-0.23563892, -0.24267292, -0.3286385 , -0.17702227, -0.35287222,\n",
-       "        -0.19248826, -0.3317757 , -1.        , -1.        , -1.        ,\n",
-       "        -1.        , -1.        , -1.        , -1.        , -0.11702128,\n",
-       "        -0.24050633, -0.25714286, -0.37378787, -0.22758621, -0.22758621,\n",
-       "        -0.31972789, -0.1862069 , -0.36986301, -0.1862069 , -0.33793103,\n",
-       "        -0.26141079],\n",
-       "       [-0.18171161, -0.0926143 , -0.13380282, -0.02930832, -0.21688159,\n",
-       "        -0.15023474, -0.40420561, -0.99995911, -0.99995779, -0.99995941,\n",
-       "        -0.99995718, -0.99996326, -0.99996042, -0.99997164, -0.19148936,\n",
-       "        -0.36708861, -0.35238095, -0.37370786, -0.1862069 , -0.11724138,\n",
-       "        -0.11564626, -0.06206897, -0.23287671, -0.14482759, -0.40689655,\n",
-       "        -0.17012448],\n",
-       "       [-0.03868699, -0.14185229, -0.0399061 , -0.07151231, -0.34818288,\n",
-       "        -0.20892019, -0.35514019, -0.99991116, -0.99991693, -0.99991555,\n",
-       "        -0.999915  , -0.99993254, -0.99992474, -0.99994125, -0.17021277,\n",
-       "        -0.27848101, -0.3968254 , -0.37362746, -0.04827586, -0.14482759,\n",
-       "        -0.04761905, -0.07586207, -0.35616438, -0.22758621, -0.36551724,\n",
-       "        -0.1659751 ],\n",
-       "       [-0.39038687, -0.27315358, -0.25586854, -0.14654162, -0.24970692,\n",
-       "        -0.16431925, -0.35280374, -0.99988085, -0.99988276, -0.99988086,\n",
-       "        -0.99987538, -0.99989714, -0.99988581, -0.99991086, -0.08510638,\n",
-       "        -0.16455696, -0.33333333, -0.37355606, -0.39310345, -0.28275862,\n",
-       "        -0.23809524, -0.13103448, -0.26027397, -0.15862069, -0.36551724,\n",
-       "        -0.2593361 ],\n",
-       "       [-0.36694021, -0.26846424, -0.25821596, -0.16060961, -0.37866354,\n",
-       "        -0.41784038, -0.49065421, -0.99984912, -0.99984859, -0.99984551,\n",
-       "        -0.99983767, -0.99986841, -0.99985921, -0.99988655, -0.20212766,\n",
-       "        -0.25316456, -0.39047619, -0.37349166, -0.36551724, -0.28275862,\n",
-       "        -0.2244898 , -0.17241379, -0.39726027, -0.42068966, -0.48965517,\n",
-       "        -0.33195021]])"
+       "array([[-0.26126126, -0.23706897, -0.20870076, -0.14106583, -0.32328767,\n",
+       "        -0.25969448, -0.42198789, -1.        , -1.        , -1.        ,\n",
+       "        -1.        , -1.        , -1.        , -1.        , -0.11007463,\n",
+       "        -0.16824645, -0.10424155, -0.37397741, -0.25233645, -0.22716628,\n",
+       "        -0.20140515, -0.13481829, -0.32239156, -0.25380117, -0.4182243 ,\n",
+       "        -0.25697453],\n",
+       "       [-0.53349001, -0.47805643, -0.49088771, -0.38557994, -0.51037182,\n",
+       "        -0.44339992, -0.55438391, -0.99983031, -0.99982547, -0.99982499,\n",
+       "        -0.99980741, -0.9998428 , -0.99983779, -0.99986887, -0.23507463,\n",
+       "        -0.26777251, -0.25233645, -0.37363511, -0.52570093, -0.470726  ,\n",
+       "        -0.4824356 , -0.37866354, -0.50762016, -0.44093567, -0.55373832,\n",
+       "        -0.48085254],\n",
+       "       [-0.41441441, -0.35971787, -0.3462669 , -0.29780564, -0.44735812,\n",
+       "        -0.36036036, -0.48486624, -0.99967026, -0.99965608, -0.99965576,\n",
+       "        -0.99961813, -0.99968416, -0.99965569, -0.99971512, -0.20335821,\n",
+       "        -0.20853081, -0.2379583 , -0.37328583, -0.4088785 , -0.34894614,\n",
+       "        -0.33723653, -0.29425557, -0.43962485, -0.35438596, -0.48364486,\n",
+       "        -0.38104315],\n",
+       "       [-0.40266353, -0.39615987, -0.4281795 , -0.37460815, -0.49902153,\n",
+       "        -0.4649432 , -0.56766257, -0.99949857, -0.99948535, -0.99949373,\n",
+       "        -0.999451  , -0.99954455, -0.99950765, -0.99959435, -0.24813433,\n",
+       "        -0.26540284, -0.27246585, -0.37296782, -0.39719626, -0.38875878,\n",
+       "        -0.42154567, -0.37162954, -0.49589683, -0.4619883 , -0.56542056,\n",
+       "        -0.4427309 ],\n",
+       "       [-0.28084606, -0.26410658, -0.18479326, -0.22296238, -0.3369863 ,\n",
+       "        -0.27183705, -0.35481351, -0.99929598, -0.99929474, -0.99930071,\n",
+       "        -0.99926107, -0.99938368, -0.99933831, -0.9994513 , -0.19402985,\n",
+       "        -0.24881517, -0.21639109, -0.37259906, -0.28271028, -0.27166276,\n",
+       "        -0.21077283, -0.23798359, -0.34349355, -0.29590643, -0.4135514 ,\n",
+       "        -0.28920464]])"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1293,17 +1419,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 5\n",
+    "step = 6\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -1312,7 +1438,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1323,7 +1449,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -1373,123 +1499,123 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.235639</td>\n",
-       "      <td>-0.242673</td>\n",
-       "      <td>-0.328638</td>\n",
-       "      <td>-0.177022</td>\n",
-       "      <td>-0.352872</td>\n",
-       "      <td>-0.192488</td>\n",
-       "      <td>-0.331776</td>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.257143</td>\n",
-       "      <td>-0.373788</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.319728</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.369863</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.337931</td>\n",
-       "      <td>-0.261411</td>\n",
+       "      <td>-0.104242</td>\n",
+       "      <td>-0.373977</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>-0.181712</td>\n",
-       "      <td>-0.092614</td>\n",
-       "      <td>-0.133803</td>\n",
-       "      <td>-0.029308</td>\n",
-       "      <td>-0.216882</td>\n",
-       "      <td>-0.150235</td>\n",
-       "      <td>-0.404206</td>\n",
-       "      <td>-0.999959</td>\n",
-       "      <td>-0.999958</td>\n",
-       "      <td>-0.999959</td>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.352381</td>\n",
-       "      <td>-0.373708</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.117241</td>\n",
-       "      <td>-0.115646</td>\n",
-       "      <td>-0.062069</td>\n",
-       "      <td>-0.232877</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.406897</td>\n",
-       "      <td>-0.170124</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.373635</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.038687</td>\n",
-       "      <td>-0.141852</td>\n",
-       "      <td>-0.039906</td>\n",
-       "      <td>-0.071512</td>\n",
-       "      <td>-0.348183</td>\n",
-       "      <td>-0.208920</td>\n",
-       "      <td>-0.355140</td>\n",
-       "      <td>-0.999911</td>\n",
-       "      <td>-0.999917</td>\n",
-       "      <td>-0.999916</td>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.396825</td>\n",
-       "      <td>-0.373627</td>\n",
-       "      <td>-0.048276</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.047619</td>\n",
-       "      <td>-0.075862</td>\n",
-       "      <td>-0.356164</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.165975</td>\n",
+       "      <td>-0.237958</td>\n",
+       "      <td>-0.373286</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>-0.390387</td>\n",
-       "      <td>-0.273154</td>\n",
-       "      <td>-0.255869</td>\n",
-       "      <td>-0.146542</td>\n",
-       "      <td>-0.249707</td>\n",
-       "      <td>-0.164319</td>\n",
-       "      <td>-0.352804</td>\n",
-       "      <td>-0.999881</td>\n",
-       "      <td>-0.999883</td>\n",
-       "      <td>-0.999881</td>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.333333</td>\n",
-       "      <td>-0.373556</td>\n",
-       "      <td>-0.393103</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.238095</td>\n",
-       "      <td>-0.131034</td>\n",
-       "      <td>-0.260274</td>\n",
-       "      <td>-0.158621</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.259336</td>\n",
+       "      <td>-0.272466</td>\n",
+       "      <td>-0.372968</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.366940</td>\n",
-       "      <td>-0.268464</td>\n",
-       "      <td>-0.258216</td>\n",
-       "      <td>-0.160610</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.417840</td>\n",
-       "      <td>-0.490654</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999846</td>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.390476</td>\n",
-       "      <td>-0.373492</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.224490</td>\n",
-       "      <td>-0.172414</td>\n",
-       "      <td>-0.397260</td>\n",
-       "      <td>-0.420690</td>\n",
-       "      <td>-0.489655</td>\n",
-       "      <td>-0.331950</td>\n",
+       "      <td>-0.216391</td>\n",
+       "      <td>-0.372599</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1498,30 +1624,30 @@
       ],
       "text/plain": [
        "         0         1         2         3         4         5         6   \\\n",
-       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
-       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
-       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
-       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
-       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
        "\n",
        "         7         8         9   ...        16        17        18        19  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.257143 -0.373788 -0.227586 -0.227586   \n",
-       "1 -0.999959 -0.999958 -0.999959  ... -0.352381 -0.373708 -0.186207 -0.117241   \n",
-       "2 -0.999911 -0.999917 -0.999916  ... -0.396825 -0.373627 -0.048276 -0.144828   \n",
-       "3 -0.999881 -0.999883 -0.999881  ... -0.333333 -0.373556 -0.393103 -0.282759   \n",
-       "4 -0.999849 -0.999849 -0.999846  ... -0.390476 -0.373492 -0.365517 -0.282759   \n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.104242 -0.373977 -0.252336 -0.227166   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.252336 -0.373635 -0.525701 -0.470726   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.237958 -0.373286 -0.408879 -0.348946   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.272466 -0.372968 -0.397196 -0.388759   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.216391 -0.372599 -0.282710 -0.271663   \n",
        "\n",
        "         20        21        22        23        24        25  \n",
-       "0 -0.319728 -0.186207 -0.369863 -0.186207 -0.337931 -0.261411  \n",
-       "1 -0.115646 -0.062069 -0.232877 -0.144828 -0.406897 -0.170124  \n",
-       "2 -0.047619 -0.075862 -0.356164 -0.227586 -0.365517 -0.165975  \n",
-       "3 -0.238095 -0.131034 -0.260274 -0.158621 -0.365517 -0.259336  \n",
-       "4 -0.224490 -0.172414 -0.397260 -0.420690 -0.489655 -0.331950  \n",
+       "0 -0.201405 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975  \n",
+       "1 -0.482436 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853  \n",
+       "2 -0.337237 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043  \n",
+       "3 -0.421546 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731  \n",
+       "4 -0.210773 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205  \n",
        "\n",
        "[5 rows x 26 columns]"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1543,17 +1669,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 6\n",
+    "step = 7\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -1562,7 +1688,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1573,7 +1699,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -1623,122 +1749,122 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.235639</td>\n",
-       "      <td>-0.242673</td>\n",
-       "      <td>-0.328638</td>\n",
-       "      <td>-0.177022</td>\n",
-       "      <td>-0.352872</td>\n",
-       "      <td>-0.192488</td>\n",
-       "      <td>-0.331776</td>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.373788</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.319728</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.369863</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.337931</td>\n",
-       "      <td>-0.261411</td>\n",
+       "      <td>-0.373977</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
        "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>-0.181712</td>\n",
-       "      <td>-0.092614</td>\n",
-       "      <td>-0.133803</td>\n",
-       "      <td>-0.029308</td>\n",
-       "      <td>-0.216882</td>\n",
-       "      <td>-0.150235</td>\n",
-       "      <td>-0.404206</td>\n",
-       "      <td>-0.999959</td>\n",
-       "      <td>-0.999958</td>\n",
-       "      <td>-0.999959</td>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.373708</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.117241</td>\n",
-       "      <td>-0.115646</td>\n",
-       "      <td>-0.062069</td>\n",
-       "      <td>-0.232877</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.406897</td>\n",
-       "      <td>-0.170124</td>\n",
+       "      <td>-0.373635</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
        "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.038687</td>\n",
-       "      <td>-0.141852</td>\n",
-       "      <td>-0.039906</td>\n",
-       "      <td>-0.071512</td>\n",
-       "      <td>-0.348183</td>\n",
-       "      <td>-0.208920</td>\n",
-       "      <td>-0.355140</td>\n",
-       "      <td>-0.999911</td>\n",
-       "      <td>-0.999917</td>\n",
-       "      <td>-0.999916</td>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.373627</td>\n",
-       "      <td>-0.048276</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.047619</td>\n",
-       "      <td>-0.075862</td>\n",
-       "      <td>-0.356164</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.165975</td>\n",
+       "      <td>-0.373286</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
        "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>-0.390387</td>\n",
-       "      <td>-0.273154</td>\n",
-       "      <td>-0.255869</td>\n",
-       "      <td>-0.146542</td>\n",
-       "      <td>-0.249707</td>\n",
-       "      <td>-0.164319</td>\n",
-       "      <td>-0.352804</td>\n",
-       "      <td>-0.999881</td>\n",
-       "      <td>-0.999883</td>\n",
-       "      <td>-0.999881</td>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.373556</td>\n",
-       "      <td>-0.393103</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.238095</td>\n",
-       "      <td>-0.131034</td>\n",
-       "      <td>-0.260274</td>\n",
-       "      <td>-0.158621</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.259336</td>\n",
+       "      <td>-0.372968</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
        "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.366940</td>\n",
-       "      <td>-0.268464</td>\n",
-       "      <td>-0.258216</td>\n",
-       "      <td>-0.160610</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.417840</td>\n",
-       "      <td>-0.490654</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999846</td>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.373492</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.224490</td>\n",
-       "      <td>-0.172414</td>\n",
-       "      <td>-0.397260</td>\n",
-       "      <td>-0.420690</td>\n",
-       "      <td>-0.489655</td>\n",
-       "      <td>-0.331950</td>\n",
+       "      <td>-0.372599</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
        "      <td>T001</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1748,30 +1874,30 @@
       ],
       "text/plain": [
        "          0         1         2         3         4         5         6  \\\n",
-       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
-       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
-       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
-       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
-       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
        "\n",
        "          7         8         9  ...        17        18        19        20  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.373788 -0.227586 -0.227586 -0.319728   \n",
-       "1 -0.999959 -0.999958 -0.999959  ... -0.373708 -0.186207 -0.117241 -0.115646   \n",
-       "2 -0.999911 -0.999917 -0.999916  ... -0.373627 -0.048276 -0.144828 -0.047619   \n",
-       "3 -0.999881 -0.999883 -0.999881  ... -0.373556 -0.393103 -0.282759 -0.238095   \n",
-       "4 -0.999849 -0.999849 -0.999846  ... -0.373492 -0.365517 -0.282759 -0.224490   \n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.373977 -0.252336 -0.227166 -0.201405   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.373635 -0.525701 -0.470726 -0.482436   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.373286 -0.408879 -0.348946 -0.337237   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.372968 -0.397196 -0.388759 -0.421546   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.372599 -0.282710 -0.271663 -0.210773   \n",
        "\n",
        "         21        22        23        24        25  turbine_id  \n",
-       "0 -0.186207 -0.369863 -0.186207 -0.337931 -0.261411        T001  \n",
-       "1 -0.062069 -0.232877 -0.144828 -0.406897 -0.170124        T001  \n",
-       "2 -0.075862 -0.356164 -0.227586 -0.365517 -0.165975        T001  \n",
-       "3 -0.131034 -0.260274 -0.158621 -0.365517 -0.259336        T001  \n",
-       "4 -0.172414 -0.397260 -0.420690 -0.489655 -0.331950        T001  \n",
+       "0 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975        T001  \n",
+       "1 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853        T001  \n",
+       "2 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043        T001  \n",
+       "3 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731        T001  \n",
+       "4 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205        T001  \n",
        "\n",
        "[5 rows x 27 columns]"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1793,17 +1919,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 7\n",
+    "step = 8\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -1812,7 +1938,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1823,7 +1949,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -1873,123 +1999,123 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.235639</td>\n",
-       "      <td>-0.242673</td>\n",
-       "      <td>-0.328638</td>\n",
-       "      <td>-0.177022</td>\n",
-       "      <td>-0.352872</td>\n",
-       "      <td>-0.192488</td>\n",
-       "      <td>-0.331776</td>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.319728</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.369863</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.337931</td>\n",
-       "      <td>-0.261411</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
        "      <td>T001</td>\n",
        "      <td>2013-01-10 00:00:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>-0.181712</td>\n",
-       "      <td>-0.092614</td>\n",
-       "      <td>-0.133803</td>\n",
-       "      <td>-0.029308</td>\n",
-       "      <td>-0.216882</td>\n",
-       "      <td>-0.150235</td>\n",
-       "      <td>-0.404206</td>\n",
-       "      <td>-0.999959</td>\n",
-       "      <td>-0.999958</td>\n",
-       "      <td>-0.999959</td>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.117241</td>\n",
-       "      <td>-0.115646</td>\n",
-       "      <td>-0.062069</td>\n",
-       "      <td>-0.232877</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.406897</td>\n",
-       "      <td>-0.170124</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.038687</td>\n",
-       "      <td>-0.141852</td>\n",
-       "      <td>-0.039906</td>\n",
-       "      <td>-0.071512</td>\n",
-       "      <td>-0.348183</td>\n",
-       "      <td>-0.208920</td>\n",
-       "      <td>-0.355140</td>\n",
-       "      <td>-0.999911</td>\n",
-       "      <td>-0.999917</td>\n",
-       "      <td>-0.999916</td>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.048276</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.047619</td>\n",
-       "      <td>-0.075862</td>\n",
-       "      <td>-0.356164</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.165975</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>-0.390387</td>\n",
-       "      <td>-0.273154</td>\n",
-       "      <td>-0.255869</td>\n",
-       "      <td>-0.146542</td>\n",
-       "      <td>-0.249707</td>\n",
-       "      <td>-0.164319</td>\n",
-       "      <td>-0.352804</td>\n",
-       "      <td>-0.999881</td>\n",
-       "      <td>-0.999883</td>\n",
-       "      <td>-0.999881</td>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.393103</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.238095</td>\n",
-       "      <td>-0.131034</td>\n",
-       "      <td>-0.260274</td>\n",
-       "      <td>-0.158621</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.259336</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.366940</td>\n",
-       "      <td>-0.268464</td>\n",
-       "      <td>-0.258216</td>\n",
-       "      <td>-0.160610</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.417840</td>\n",
-       "      <td>-0.490654</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999846</td>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.224490</td>\n",
-       "      <td>-0.172414</td>\n",
-       "      <td>-0.397260</td>\n",
-       "      <td>-0.420690</td>\n",
-       "      <td>-0.489655</td>\n",
-       "      <td>-0.331950</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1998,30 +2124,30 @@
       ],
       "text/plain": [
        "          0         1         2         3         4         5         6  \\\n",
-       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
-       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
-       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
-       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
-       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
        "\n",
        "          7         8         9  ...        18        19        20        21  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.227586 -0.227586 -0.319728 -0.186207   \n",
-       "1 -0.999959 -0.999958 -0.999959  ... -0.186207 -0.117241 -0.115646 -0.062069   \n",
-       "2 -0.999911 -0.999917 -0.999916  ... -0.048276 -0.144828 -0.047619 -0.075862   \n",
-       "3 -0.999881 -0.999883 -0.999881  ... -0.393103 -0.282759 -0.238095 -0.131034   \n",
-       "4 -0.999849 -0.999849 -0.999846  ... -0.365517 -0.282759 -0.224490 -0.172414   \n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.252336 -0.227166 -0.201405 -0.134818   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.525701 -0.470726 -0.482436 -0.378664   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.408879 -0.348946 -0.337237 -0.294256   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.397196 -0.388759 -0.421546 -0.371630   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.282710 -0.271663 -0.210773 -0.237984   \n",
        "\n",
        "         22        23        24        25  turbine_id           timestamp  \n",
-       "0 -0.369863 -0.186207 -0.337931 -0.261411        T001 2013-01-10 00:00:00  \n",
-       "1 -0.232877 -0.144828 -0.406897 -0.170124        T001 2013-01-10 00:10:00  \n",
-       "2 -0.356164 -0.227586 -0.365517 -0.165975        T001 2013-01-10 00:20:00  \n",
-       "3 -0.260274 -0.158621 -0.365517 -0.259336        T001 2013-01-10 00:30:00  \n",
-       "4 -0.397260 -0.420690 -0.489655 -0.331950        T001 2013-01-10 00:40:00  \n",
+       "0 -0.322392 -0.253801 -0.418224 -0.256975        T001 2013-01-10 00:00:00  \n",
+       "1 -0.507620 -0.440936 -0.553738 -0.480853        T001 2013-01-10 01:00:00  \n",
+       "2 -0.439625 -0.354386 -0.483645 -0.381043        T001 2013-01-10 02:00:00  \n",
+       "3 -0.495897 -0.461988 -0.565421 -0.442731        T001 2013-01-10 03:00:00  \n",
+       "4 -0.343494 -0.295906 -0.413551 -0.289205        T001 2013-01-10 04:00:00  \n",
        "\n",
        "[5 rows x 28 columns]"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2044,7 +2170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
@@ -2053,29 +2179,29 @@
        "{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "pipeline._pipeline.get_hyperparameters()[\n",
-    "    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
+    "    'mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 8\n",
+    "step = 9\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -2084,7 +2210,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2095,16 +2221,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(51121, 28)"
+       "(8521, 28)"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2115,7 +2241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -2124,7 +2250,7 @@
        "(353,)"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 40,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2135,7 +2261,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
@@ -2144,7 +2270,7 @@
        "(353, 24, 26)"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 41,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2155,7 +2281,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 42,
    "metadata": {
     "scrolled": true
    },
@@ -2163,153 +2289,153 @@
     {
      "data": {
       "text/plain": [
-       "array([[-0.66002345, -0.57327081, -0.64084507, -0.57796014, -0.6014068 ,\n",
-       "        -0.56103286, -0.55140187, -0.9928135 , -0.99291267, -0.99315058,\n",
-       "        -0.99304288, -0.99346346, -0.99352632, -0.99395333, -0.42553191,\n",
-       "        -0.41772152, -0.58730159, -0.35996294, -0.66896552, -0.57241379,\n",
-       "        -0.61904762, -0.5862069 , -0.60273973, -0.55862069, -0.55862069,\n",
-       "        -0.59751037],\n",
-       "       [-0.2989449 , -0.38569754, -0.48591549, -0.47713951, -0.66705744,\n",
-       "        -0.5915493 , -0.77336449, -0.99278389, -0.9928852 , -0.99312701,\n",
-       "        -0.99301988, -0.9934481 , -0.9935075 , -0.9939459 , -0.39361702,\n",
-       "        -0.40506329, -0.54285714, -0.35992014, -0.40689655, -0.42068966,\n",
-       "        -0.46938776, -0.48965517, -0.67123288, -0.5862069 , -0.83448276,\n",
-       "        -0.5560166 ],\n",
-       "       [-0.33645955, -0.40679953, -0.39906103, -0.38569754, -0.56154748,\n",
-       "        -0.43192488, -0.45560748, -0.99275498, -0.9928584 , -0.99310017,\n",
-       "        -0.99299431, -0.99342739, -0.99348349, -0.99392294, -0.29787234,\n",
-       "        -0.3164557 , -0.49206349, -0.35986854, -0.42068966, -0.43448276,\n",
-       "        -0.40136054, -0.43448276, -0.56164384, -0.47586207, -0.51724138,\n",
-       "        -0.46473029],\n",
-       "       [-0.36928488, -0.41148886, -0.51173709, -0.47010551, -0.54982415,\n",
-       "        -0.48122066, -0.51869159, -0.99272467, -0.9928316 , -0.99307791,\n",
-       "        -0.99297067, -0.99340669, -0.99346079, -0.99390066, -0.29787234,\n",
-       "        -0.35443038, -0.49206349, -0.35981854, -0.39310345, -0.43448276,\n",
-       "        -0.49659864, -0.47586207, -0.56164384, -0.50344828, -0.53103448,\n",
-       "        -0.4813278 ],\n",
-       "       [-0.20750293, -0.35287222, -0.37089202, -0.2989449 , -0.32473623,\n",
-       "        -0.37793427, -0.45794393, -0.99269435, -0.99280347, -0.99305173,\n",
-       "        -0.99294447, -0.9933793 , -0.99343419, -0.9938777 , -0.32978723,\n",
-       "        -0.39240506, -0.49206349, -0.35976314, -0.39310345, -0.40689655,\n",
-       "        -0.41496599, -0.42068966, -0.42465753, -0.42068966, -0.51724138,\n",
-       "        -0.4253112 ],\n",
-       "       [-0.55685815, -0.60375147, -0.64084507, -0.54513482, -0.55685815,\n",
-       "        -0.58215962, -0.63785047, -0.99267179, -0.99278404, -0.99303471,\n",
-       "        -0.99292338, -0.99335793, -0.99341472, -0.99386014, -0.29787234,\n",
-       "        -0.34177215, -0.51746032, -0.35972353, -0.54482759, -0.5862069 ,\n",
-       "        -0.60544218, -0.53103448, -0.54794521, -0.57241379, -0.62758621,\n",
-       "        -0.58921162],\n",
-       "       [-0.66705744, -0.67643611, -0.69014085, -0.64361079, -0.74443142,\n",
-       "        -0.7370892 , -0.7546729 , -0.99265487, -0.99276863, -0.99302096,\n",
-       "        -0.9929074 , -0.99334657, -0.9934024 , -0.99384934, -0.39361702,\n",
-       "        -0.48101266, -0.51746032, -0.35969533, -0.65517241, -0.66896552,\n",
-       "        -0.67346939, -0.64137931, -0.75342466, -0.72413793, -0.76551724,\n",
-       "        -0.70746888],\n",
-       "       [-0.53341149, -0.60375147, -0.63849765, -0.61547479, -0.71395076,\n",
-       "        -0.70187793, -0.72897196, -0.99263231, -0.99275054, -0.99300394,\n",
-       "        -0.99289014, -0.99333255, -0.99338877, -0.9938365 , -0.38297872,\n",
-       "        -0.37974684, -0.54920635, -0.35966173, -0.54482759, -0.6137931 ,\n",
-       "        -0.60544218, -0.6137931 , -0.69863014, -0.69655172, -0.72413793,\n",
-       "        -0.65145228],\n",
-       "       [-0.44196952, -0.4021102 , -0.49295775, -0.49355217, -0.62719812,\n",
-       "        -0.62676056, -0.71728972, -0.99260481, -0.99272173, -0.99298103,\n",
-       "        -0.99286777, -0.99331518, -0.9933719 , -0.99382367, -0.38297872,\n",
-       "        -0.4556962 , -0.54285714, -0.35961793, -0.44827586, -0.39310345,\n",
-       "        -0.4829932 , -0.50344828, -0.63013699, -0.62758621, -0.72413793,\n",
-       "        -0.54564315],\n",
-       "       [-0.46307151, -0.38100821, -0.35446009, -0.44900352, -0.50293083,\n",
-       "        -0.4741784 , -0.63317757, -0.99257731, -0.99269226, -0.99295157,\n",
-       "        -0.99284285, -0.99329247, -0.9933479 , -0.99380612, -0.28723404,\n",
-       "        -0.3164557 , -0.47301587, -0.35956633, -0.44827586, -0.37931034,\n",
-       "        -0.34693878, -0.44827586, -0.52054795, -0.47586207, -0.62758621,\n",
-       "        -0.46473029],\n",
-       "       [-0.26611958, -0.26611958, -0.29107981, -0.34349355, -0.3950762 ,\n",
-       "        -0.29577465, -0.43925234, -0.9925477 , -0.99266278, -0.99292211,\n",
-       "        -0.99281601, -0.99326575, -0.99331805, -0.99378316, -0.28723404,\n",
-       "        -0.39240506, -0.46031746, -0.35950873, -0.40689655, -0.37931034,\n",
-       "        -0.34693878, -0.40689655, -0.43835616, -0.35172414, -0.51724138,\n",
-       "        -0.40248963],\n",
-       "       [-0.46307151, -0.35990621, -0.43192488, -0.36928488, -0.47245018,\n",
-       "        -0.44600939, -0.41121495, -0.99252091, -0.9926333 , -0.99289592,\n",
-       "        -0.99278789, -0.99324104, -0.99329275, -0.99375547, -0.28723404,\n",
-       "        -0.43037975, -0.46666667, -0.35945292, -0.46206897, -0.37931034,\n",
-       "        -0.41496599, -0.37931034, -0.47945205, -0.44827586, -0.42068966,\n",
-       "        -0.42116183],\n",
-       "       [-0.44431419, -0.4021102 , -0.38732394, -0.3059789 , -0.35990621,\n",
-       "        -0.28403756, -0.40420561, -0.99249341, -0.99260583, -0.99287039,\n",
-       "        -0.99275913, -0.99321298, -0.99326226, -0.99372846, -0.30851064,\n",
-       "        -0.4556962 , -0.46031746, -0.35939572, -0.44827586, -0.42068966,\n",
-       "        -0.42857143, -0.36551724, -0.4109589 , -0.33793103, -0.43448276,\n",
-       "        -0.406639  ],\n",
-       "       [-0.43962485, -0.36459555, -0.35211268, -0.35052755, -0.44665885,\n",
-       "        -0.34741784, -0.44859813, -0.99246592, -0.99257703, -0.99284028,\n",
-       "        -0.99273037, -0.99318693, -0.99323176, -0.99370279, -0.28723404,\n",
-       "        -0.36708861, -0.48571429, -0.35933712, -0.44827586, -0.39310345,\n",
-       "        -0.33333333, -0.36551724, -0.45205479, -0.33793103, -0.46206897,\n",
-       "        -0.39211618],\n",
-       "       [-0.2028136 , -0.25439625, -0.30751174, -0.3130129 , -0.37631887,\n",
-       "        -0.3685446 , -0.46495327, -0.99243067, -0.99254152, -0.9928082 ,\n",
-       "        -0.99269906, -0.99315821, -0.99320322, -0.99367781, -0.27659574,\n",
-       "        -0.32911392, -0.47301587, -0.35927332, -0.29655172, -0.25517241,\n",
-       "        -0.29251701, -0.31034483, -0.39726027, -0.37931034, -0.47586207,\n",
-       "        -0.33817427],\n",
-       "       [-0.23329426, -0.27080891, -0.31924883, -0.24736225, -0.35521688,\n",
-       "        -0.33098592, -0.4182243 , -0.99239753, -0.99250668, -0.99277743,\n",
-       "        -0.99266518, -0.99312815, -0.99317272, -0.99365012, -0.26595745,\n",
-       "        -0.40506329, -0.46666667, -0.35920811, -0.33793103, -0.26896552,\n",
-       "        -0.31972789, -0.25517241, -0.36986301, -0.33793103, -0.42068966,\n",
-       "        -0.32365145],\n",
-       "       [-0.12778429, -0.11137163, -0.10798122, -0.05275498, -0.25439625,\n",
-       "        -0.23474178, -0.28271028, -0.99236228, -0.99247117, -0.99274143,\n",
-       "        -0.99263131, -0.99309876, -0.99314028, -0.99362108, -0.24468085,\n",
-       "        -0.32911392, -0.43492063, -0.35914011, -0.29655172, -0.25517241,\n",
-       "        -0.21088435, -0.25517241, -0.38356164, -0.29655172, -0.39310345,\n",
-       "        -0.29460581],\n",
-       "       [-0.14185229, -0.2028136 , -0.31690141, -0.17467761, -0.24970692,\n",
-       "        -0.25117371, -0.37383178, -0.9923242 , -0.99243567, -0.99271066,\n",
-       "        -0.9925968 , -0.9930667 , -0.99310849, -0.99359204, -0.22340426,\n",
-       "        -0.3164557 , -0.41587302, -0.35907171, -0.24137931, -0.25517241,\n",
-       "        -0.31972789, -0.24137931, -0.32876712, -0.31034483, -0.39310345,\n",
-       "        -0.29045643],\n",
-       "       [-0.4021102 , -0.32708089, -0.33802817, -0.28018757, -0.3950762 ,\n",
-       "        -0.40140845, -0.48364486, -0.99229459, -0.99240284, -0.99268055,\n",
-       "        -0.99256421, -0.99303731, -0.99308059, -0.99356773, -0.25531915,\n",
-       "        -0.29113924, -0.40952381, -0.35901131, -0.40689655, -0.31034483,\n",
-       "        -0.33333333, -0.28275862, -0.38356164, -0.39310345, -0.48965517,\n",
-       "        -0.37344398],\n",
-       "       [-0.27549824, -0.3059789 , -0.37089202, -0.20046893, -0.34818288,\n",
-       "        -0.33802817, -0.42056075, -0.99225863, -0.99237068, -0.99265109,\n",
-       "        -0.99252778, -0.99300725, -0.99305075, -0.99354072, -0.28723404,\n",
-       "        -0.41772152, -0.48571429, -0.3589459 , -0.28275862, -0.32413793,\n",
-       "        -0.34693878, -0.2       , -0.36986301, -0.35172414, -0.43448276,\n",
-       "        -0.32157676],\n",
-       "       [-0.30832356, -0.3059789 , -0.3286385 , -0.31066823, -0.32473623,\n",
-       "        -0.34741784, -0.38785047, -0.99222479, -0.99233786, -0.99262032,\n",
-       "        -0.9924971 , -0.99297519, -0.9930209 , -0.99351168, -0.28723404,\n",
-       "        -0.3164557 , -0.47936508, -0.3588813 , -0.32413793, -0.31034483,\n",
-       "        -0.31972789, -0.32413793, -0.32876712, -0.35172414, -0.39310345,\n",
-       "        -0.32987552],\n",
-       "       [-0.33645955, -0.2098476 , -0.24413146, -0.2919109 , -0.41383353,\n",
-       "        -0.41079812, -0.46495327, -0.99219025, -0.99230168, -0.99258563,\n",
-       "        -0.99246579, -0.99294781, -0.99299365, -0.9934867 , -0.24468085,\n",
-       "        -0.29113924, -0.42857143, -0.3588177 , -0.31034483, -0.24137931,\n",
-       "        -0.23809524, -0.31034483, -0.42465753, -0.40689655, -0.47586207,\n",
-       "        -0.34024896],\n",
-       "       [-0.24267292, -0.15357562, -0.19248826, -0.13950762, -0.35052755,\n",
-       "        -0.30046948, -0.37616822, -0.99215358, -0.99226215, -0.99254831,\n",
-       "        -0.99242872, -0.99291708, -0.99296121, -0.99345766, -0.22340426,\n",
-       "        -0.25316456, -0.42857143, -0.3587457 , -0.26896552, -0.17241379,\n",
-       "        -0.18367347, -0.1862069 , -0.35616438, -0.29655172, -0.39310345,\n",
-       "        -0.25311203],\n",
-       "       [-0.2989449 , -0.26377491, -0.27699531, -0.15592028, -0.34583822,\n",
-       "        -0.34976526, -0.48831776, -0.99211763, -0.99222731, -0.99251493,\n",
-       "        -0.99239038, -0.99288636, -0.99293072, -0.99343267, -0.20212766,\n",
-       "        -0.24050633, -0.3968254 , -0.35867929, -0.28275862, -0.26896552,\n",
-       "        -0.26530612, -0.15862069, -0.35616438, -0.33793103, -0.47586207,\n",
-       "        -0.31120332]])"
+       "array([[-0.58793576, -0.60305643, -0.63981971, -0.61481191, -0.69823875,\n",
+       "        -0.65021543, -0.68912322, -0.99436914, -0.99439755, -0.99454249,\n",
+       "        -0.99446788, -0.99476185, -0.99490997, -0.99529511, -0.34701493,\n",
+       "        -0.33886256, -0.33860532, -0.36301186, -0.57943925, -0.59250585,\n",
+       "        -0.6323185 , -0.60609613, -0.69284877, -0.64444444, -0.68691589,\n",
+       "        -0.63853752],\n",
+       "       [-0.56600078, -0.5846395 , -0.63002156, -0.61559561, -0.70880626,\n",
+       "        -0.66392479, -0.69732474, -0.9942427 , -0.99427986, -0.9944408 ,\n",
+       "        -0.99436498, -0.99468147, -0.99482011, -0.99521249, -0.33955224,\n",
+       "        -0.31516588, -0.38892883, -0.36280656, -0.55841121, -0.57611241,\n",
+       "        -0.62295082, -0.61078546, -0.70222743, -0.65847953, -0.69392523,\n",
+       "        -0.63645815],\n",
+       "       [-0.64081473, -0.64184953, -0.67038997, -0.63597179, -0.71350294,\n",
+       "        -0.65844105, -0.66764304, -0.99412236, -0.99416864, -0.99434228,\n",
+       "        -0.99426059, -0.99459663, -0.99472365, -0.99511795, -0.34328358,\n",
+       "        -0.30094787, -0.36304817, -0.36259859, -0.63317757, -0.6323185 ,\n",
+       "        -0.66042155, -0.62954279, -0.70926143, -0.65380117, -0.66588785,\n",
+       "        -0.66002426],\n",
+       "       [-0.73678026, -0.72139498, -0.72800314, -0.69239812, -0.71350294,\n",
+       "        -0.68233451, -0.69732474, -0.99403811, -0.99408512, -0.9942623 ,\n",
+       "        -0.99417111, -0.99451525, -0.99463206, -0.9950315 , -0.40671642,\n",
+       "        -0.36018957, -0.44644141, -0.36242395, -0.72897196, -0.71194379,\n",
+       "        -0.71896956, -0.68347011, -0.70926143, -0.6748538 , -0.69392523,\n",
+       "        -0.71027552],\n",
+       "       [-0.75401488, -0.74333856, -0.75112679, -0.71590909, -0.76555773,\n",
+       "        -0.73599687, -0.75278266, -0.99395808, -0.99400684, -0.99419094,\n",
+       "        -0.99409367, -0.99444556, -0.99455517, -0.99495418, -0.43656716,\n",
+       "        -0.3957346 , -0.465133  , -0.36226933, -0.7453271 , -0.735363  ,\n",
+       "        -0.74004684, -0.70926143, -0.76084408, -0.73099415, -0.75      ,\n",
+       "        -0.7463178 ],\n",
+       "       [-0.79866823, -0.76684953, -0.7558299 , -0.72688088, -0.76125245,\n",
+       "        -0.75714845, -0.78363601, -0.99389098, -0.99393583, -0.99411958,\n",
+       "        -0.99401538, -0.99437709, -0.99448423, -0.99489036, -0.43843284,\n",
+       "        -0.37914692, -0.49388929, -0.36212623, -0.78971963, -0.75644028,\n",
+       "        -0.7470726 , -0.72098476, -0.75615475, -0.7497076 , -0.78037383,\n",
+       "        -0.76572518],\n",
+       "       [-0.84919702, -0.83855799, -0.82245738, -0.78134796, -0.75225049,\n",
+       "        -0.70661966, -0.65787932, -0.99384186, -0.99388279, -0.9940635 ,\n",
+       "        -0.99395157, -0.9943113 , -0.99441264, -0.99481202, -0.51679104,\n",
+       "        -0.50473934, -0.53414809, -0.36199904, -0.8411215 , -0.83138173,\n",
+       "        -0.81264637, -0.77256741, -0.74677608, -0.70292398, -0.65654206,\n",
+       "        -0.77438919],\n",
+       "       [-0.69134352, -0.705721  , -0.73584166, -0.70297806, -0.75225049,\n",
+       "        -0.72659616, -0.71724273, -0.99377229, -0.99381646, -0.99400032,\n",
+       "        -0.99387925, -0.99423682, -0.99433003, -0.99471624, -0.43843284,\n",
+       "        -0.40521327, -0.48094896, -0.36184615, -0.68457944, -0.69555035,\n",
+       "        -0.72599532, -0.6975381 , -0.74677608, -0.71929825, -0.71261682,\n",
+       "        -0.71893953],\n",
+       "       [-0.84488837, -0.82915361, -0.83578287, -0.81896552, -0.86105675,\n",
+       "        -0.8613396 , -0.86330795, -0.99369779, -0.99374656, -0.99393715,\n",
+       "        -0.99381182, -0.99418494, -0.99427639, -0.99466379, -0.49253731,\n",
+       "        -0.48104265, -0.51545651, -0.36172116, -0.8364486 , -0.81967213,\n",
+       "        -0.82435597, -0.81008206, -0.85463072, -0.85497076, -0.86214953,\n",
+       "        -0.84889967],\n",
+       "       [-0.77908343, -0.78761755, -0.78757594, -0.78918495, -0.82348337,\n",
+       "        -0.82491187, -0.85276313, -0.99365725, -0.99370625, -0.99389819,\n",
+       "        -0.99377113, -0.99415254, -0.99424222, -0.99463329, -0.52798507,\n",
+       "        -0.6042654 , -0.51545651, -0.36164779, -0.77336449, -0.77985948,\n",
+       "        -0.78220141, -0.78429074, -0.86635404, -0.82222222, -0.85046729,\n",
+       "        -0.81562987],\n",
+       "       [-0.70544458, -0.64733542, -0.64844209, -0.61833856, -0.6481409 ,\n",
+       "        -0.66392479, -0.71646163, -0.99356747, -0.99360832, -0.99380327,\n",
+       "        -0.99367558, -0.99407272, -0.99415647, -0.99456035, -0.36567164,\n",
+       "        -0.4549763 , -0.34291876, -0.36146698, -0.70560748, -0.63934426,\n",
+       "        -0.63934426, -0.62016413, -0.64830012, -0.65847953, -0.72663551,\n",
+       "        -0.66868827],\n",
+       "       [-0.70387779, -0.67202194, -0.69508132, -0.72413793, -0.73228963,\n",
+       "        -0.72816295, -0.72310096, -0.99348204, -0.99351955, -0.99372023,\n",
+       "        -0.99359367, -0.99399256, -0.99407882, -0.99449203, -0.38432836,\n",
+       "        -0.58530806, -0.33141625, -0.36130226, -0.69392523, -0.66042155,\n",
+       "        -0.68384075, -0.71629543, -0.72801876, -0.72163743, -0.72196262,\n",
+       "        -0.7113152 ],\n",
+       "       [-0.8515472 , -0.81073668, -0.776602  , -0.76724138, -0.78277886,\n",
+       "        -0.75832354, -0.74262839, -0.99341682, -0.99344607, -0.99364669,\n",
+       "        -0.99352762, -0.99392743, -0.99401037, -0.99441763, -0.44029851,\n",
+       "        -0.5521327 , -0.38461538, -0.36116102, -0.84345794, -0.80327869,\n",
+       "        -0.76814988, -0.76084408, -0.77725674, -0.75204678, -0.73831776,\n",
+       "        -0.7865188 ],\n",
+       "       [-0.80258519, -0.83659875, -0.83499902, -0.79741379, -0.80821918,\n",
+       "        -0.81629456, -0.79379028, -0.99336347, -0.99339091, -0.99358745,\n",
+       "        -0.99346147, -0.9938642 , -0.99394733, -0.99434605, -0.44962687,\n",
+       "        -0.6563981 , -0.34579439, -0.36103606, -0.79439252, -0.82669789,\n",
+       "        -0.82669789, -0.78898007, -0.80304807, -0.81052632, -0.79205607,\n",
+       "        -0.81632299],\n",
+       "       [-0.83313749, -0.87539185, -0.90241035, -0.88440439, -0.86771037,\n",
+       "        -0.87935762, -0.87580551, -0.99331764, -0.99335898, -0.99355602,\n",
+       "        -0.99342259, -0.99382267, -0.99390959, -0.99430418, -0.54291045,\n",
+       "        -0.72274882, -0.42918763, -0.36096002, -0.82943925, -0.87119438,\n",
+       "        -0.89461358, -0.87573271, -0.86166471, -0.87134503, -0.87383178,\n",
+       "        -0.88078323],\n",
+       "       [-0.56678418, -0.60031348, -0.64295512, -0.78409091, -0.76164384,\n",
+       "        -0.78535057, -0.82464362, -0.99321481, -0.99327557, -0.99349034,\n",
+       "        -0.99337881, -0.9937915 , -0.99387347, -0.99427367, -0.32835821,\n",
+       "        -0.47630332, -0.25808771, -0.36084678, -0.56074766, -0.59250585,\n",
+       "        -0.6323185 , -0.77960141, -0.84759672, -0.78947368, -0.8364486 ,\n",
+       "        -0.72621729],\n",
+       "       [-0.77007442, -0.81230408, -0.83186361, -0.85540752, -0.85870841,\n",
+       "        -0.86486486, -0.847686  , -0.99311634, -0.99319338, -0.99341516,\n",
+       "        -0.99332651, -0.99374196, -0.99381551, -0.99422246, -0.46641791,\n",
+       "        -0.65165877, -0.39324227, -0.36071245, -0.76168224, -0.80093677,\n",
+       "        -0.82201405, -0.84759672, -0.85463072, -0.85730994, -0.84579439,\n",
+       "        -0.83780974],\n",
+       "       [-0.87622405, -0.92163009, -0.91377621, -0.89224138, -0.84540117,\n",
+       "        -0.83431257, -0.82112869, -0.99306816, -0.99315821, -0.99338734,\n",
+       "        -0.99329935, -0.99370611, -0.99377885, -0.9941789 , -0.55783582,\n",
+       "        -0.65402844, -0.50970525, -0.36064058, -0.86682243, -0.91334895,\n",
+       "        -0.90632319, -0.88745604, -0.84056272, -0.82923977, -0.81775701,\n",
+       "        -0.87731762],\n",
+       "       [-0.82843713, -0.83111285, -0.84166177, -0.8322884 , -0.84579256,\n",
+       "        -0.8515472 , -0.86057411, -0.99302656, -0.99312426, -0.99335155,\n",
+       "        -0.99325919, -0.99365991, -0.99373278, -0.99413129, -0.50559701,\n",
+       "        -0.53791469, -0.52120776, -0.36055736, -0.82242991, -0.82201405,\n",
+       "        -0.83138173, -0.82415006, -0.84056272, -0.84327485, -0.85747664,\n",
+       "        -0.84508751],\n",
+       "       [-0.74539757, -0.73824451, -0.76484421, -0.72100313, -0.73228963,\n",
+       "        -0.70975323, -0.739504  , -0.99296569, -0.99306553, -0.99329699,\n",
+       "        -0.9932005 , -0.99360224, -0.99367493, -0.99407862, -0.45149254,\n",
+       "        -0.46208531, -0.48382459, -0.36044105, -0.73598131, -0.73067916,\n",
+       "        -0.75644028, -0.71629543, -0.72801876, -0.70526316, -0.73831776,\n",
+       "        -0.73696067],\n",
+       "       [-0.40814728, -0.4596395 , -0.51087596, -0.46316614, -0.54598826,\n",
+       "        -0.50607129, -0.57039641, -0.99283748, -0.99294147, -0.9931881 ,\n",
+       "        -0.99308418, -0.99349681, -0.99356041, -0.99398047, -0.30597015,\n",
+       "        -0.29383886, -0.34867002, -0.36020709, -0.46728972, -0.470726  ,\n",
+       "        -0.5175644 , -0.48651817, -0.55685815, -0.51812865, -0.59579439,\n",
+       "        -0.5179345 ],\n",
+       "       [-0.47591069, -0.45219436, -0.48579267, -0.48981191, -0.57847358,\n",
+       "        -0.54876616, -0.61882445, -0.99268659, -0.99280044, -0.99306033,\n",
+       "        -0.99295359, -0.99338192, -0.99344287, -0.9938794 , -0.30223881,\n",
+       "        -0.33649289, -0.32278936, -0.35994787, -0.49065421, -0.46370023,\n",
+       "        -0.4941452 , -0.49589683, -0.58264947, -0.55321637, -0.62850467,\n",
+       "        -0.53110379],\n",
+       "       [-0.26792009, -0.27115987, -0.30080345, -0.24412226, -0.34246575,\n",
+       "        -0.30434783, -0.40285101, -0.99250927, -0.99261854, -0.99288914,\n",
+       "        -0.99278188, -0.99322495, -0.99327569, -0.9937324 , -0.22947761,\n",
+       "        -0.28909953, -0.26096334, -0.35960139, -0.33878505, -0.29976581,\n",
+       "        -0.32786885, -0.2919109 , -0.38100821, -0.32865497, -0.42523364,\n",
+       "        -0.3394559 ],\n",
+       "       [-0.31374853, -0.26449843, -0.2941407 , -0.23315047, -0.36516634,\n",
+       "        -0.35957697, -0.44112478, -0.9923035 , -0.99241264, -0.99269787,\n",
+       "        -0.99258055, -0.99304482, -0.99309553, -0.99356987, -0.2108209 ,\n",
+       "        -0.21563981, -0.23652049, -0.35921021, -0.30607477, -0.26229508,\n",
+       "        -0.29039813, -0.23563892, -0.35990621, -0.35204678, -0.43925234,\n",
+       "        -0.32004852]])"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2331,22 +2457,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-01-18 05:32:48.464559: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2022-01-18 05:32:48.495873: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fba31d9b0c0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
-      "2022-01-18 05:32:48.495892: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
+      "2023-04-13 18:15:22.843587: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
+      "2023-04-13 18:15:22.880000: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f80a9a82ef0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+      "2023-04-13 18:15:22.880015: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
      ]
     }
    ],
    "source": [
-    "step = 9\n",
+    "step = 10\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   }
@@ -2367,7 +2492,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.8.16"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/pipelines/lstm_regressor_with_unstack.ipynb b/tutorials/pipelines/lstm_regressor_with_unstack.ipynb
index 516c6da..41a5af8 100644
--- a/tutorials/pipelines/lstm_regressor_with_unstack.ipynb
+++ b/tutorials/pipelines/lstm_regressor_with_unstack.ipynb
@@ -17,12 +17,12 @@
    "source": [
     "from draco.demo import load_demo\n",
     "\n",
-    "train_target_times, test_target_times, readings = load_demo()"
+    "target_times, readings = load_demo()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "80315927",
    "metadata": {},
    "outputs": [],
@@ -32,7 +32,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "id": "1073a88a",
    "metadata": {},
    "outputs": [],
@@ -44,10 +44,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "1c6cb15d",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['pandas.DataFrame.resample',\n",
+       " 'pandas.DataFrame.unstack',\n",
+       " 'pandas.DataFrame.pop',\n",
+       " 'pandas.DataFrame.pop',\n",
+       " 'sklearn.impute.SimpleImputer',\n",
+       " 'sklearn.preprocessing.MinMaxScaler',\n",
+       " 'pandas.DataFrame',\n",
+       " 'pandas.DataFrame.set',\n",
+       " 'pandas.DataFrame.set',\n",
+       " 'mlstars.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
+       " 'keras.Sequential.LSTMTimeSeriesRegressor']"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "pipeline.template['primitives']"
    ]
@@ -70,7 +91,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 5,
    "id": "a2396b1c",
    "metadata": {},
    "outputs": [
@@ -96,61 +117,61 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>turbine_id</th>\n",
-       "      <th>timestamp</th>\n",
        "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
        "      <th>value</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:10:00</td>\n",
-       "      <td>operational setting 1</td>\n",
-       "      <td>-0.0007</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>S01</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>323.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:20:00</td>\n",
-       "      <td>operational setting 1</td>\n",
-       "      <td>0.0019</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>S02</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>320.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:30:00</td>\n",
-       "      <td>operational setting 1</td>\n",
-       "      <td>-0.0043</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>S03</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>284.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:40:00</td>\n",
-       "      <td>operational setting 1</td>\n",
-       "      <td>0.0007</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>S04</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>348.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:50:00</td>\n",
-       "      <td>operational setting 1</td>\n",
-       "      <td>-0.0019</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>S05</td>\n",
+       "      <td>2013-01-10</td>\n",
+       "      <td>273.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   turbine_id           timestamp              signal_id   value\n",
-       "0           1 2013-01-12 00:10:00  operational setting 1 -0.0007\n",
-       "1           1 2013-01-12 00:20:00  operational setting 1  0.0019\n",
-       "2           1 2013-01-12 00:30:00  operational setting 1 -0.0043\n",
-       "3           1 2013-01-12 00:40:00  operational setting 1  0.0007\n",
-       "4           1 2013-01-12 00:50:00  operational setting 1 -0.0019"
+       "  turbine_id signal_id  timestamp  value\n",
+       "0       T001       S01 2013-01-10  323.0\n",
+       "1       T001       S02 2013-01-10  320.0\n",
+       "2       T001       S03 2013-01-10  284.0\n",
+       "3       T001       S04 2013-01-10  348.0\n",
+       "4       T001       S05 2013-01-10  273.0"
       ]
      },
-     "execution_count": 2,
+     "execution_count": 5,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -161,7 +182,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 6,
    "id": "3cd80f1f",
    "metadata": {},
    "outputs": [
@@ -194,60 +215,116 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 04:20:00</td>\n",
-       "      <td>166</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-12</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 04:30:00</td>\n",
-       "      <td>165</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-13</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 04:40:00</td>\n",
-       "      <td>164</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-14</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-15</td>\n",
        "      <td>1</td>\n",
-       "      <td>2013-01-12 04:50:00</td>\n",
-       "      <td>163</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 05:00:00</td>\n",
-       "      <td>162</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-16</td>\n",
+       "      <td>0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   turbine_id         cutoff_time  target\n",
-       "0           1 2013-01-12 04:20:00     166\n",
-       "1           1 2013-01-12 04:30:00     165\n",
-       "2           1 2013-01-12 04:40:00     164\n",
-       "3           1 2013-01-12 04:50:00     163\n",
-       "4           1 2013-01-12 05:00:00     162"
+       "  turbine_id cutoff_time  target\n",
+       "0       T001  2013-01-12       0\n",
+       "1       T001  2013-01-13       0\n",
+       "2       T001  2013-01-14       0\n",
+       "3       T001  2013-01-15       1\n",
+       "4       T001  2013-01-16       0"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "train_target_times.head()"
+    "target_times.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a956f746",
+   "metadata": {},
+   "source": [
+    "## Data Preparation (part of Draco Pipeline)\n",
+    "\n",
+    "* Input: target_times, readings, turbines\n",
+    "* Output: X, y, readings, turbines\n",
+    "* Effect: target_times has been split into X and y"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "a813a966",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.resample\n",
+    "\n",
+    "* Input: readings\n",
+    "* Output: readings (resampled)\n",
+    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
+    "  signal_id and timestamp have been set as a multi-index"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
-   "id": "6a759b57",
+   "execution_count": 7,
+   "id": "bb00b3b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "context = pipeline.fit(target_times, readings, output_=0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "381e361d",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "b41f13c1",
    "metadata": {},
    "outputs": [
     {
@@ -271,111 +348,69 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
        "      <th>turbine_id</th>\n",
-       "      <th>cutoff_time</th>\n",
-       "      <th>target</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th></th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
-       "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-13 13:10:00</td>\n",
-       "      <td>112.0</td>\n",
+       "      <th rowspan=\"5\" valign=\"top\">T001</th>\n",
+       "      <th rowspan=\"5\" valign=\"top\">S01</th>\n",
+       "      <th>2013-01-10 00:00:00</th>\n",
+       "      <td>323.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>1</th>\n",
-       "      <td>2</td>\n",
-       "      <td>2013-01-14 08:00:00</td>\n",
-       "      <td>98.0</td>\n",
+       "      <th>2013-01-10 00:10:00</th>\n",
+       "      <td>346.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>2</th>\n",
-       "      <td>3</td>\n",
-       "      <td>2013-01-14 02:50:00</td>\n",
-       "      <td>69.0</td>\n",
+       "      <th>2013-01-10 00:20:00</th>\n",
+       "      <td>407.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>3</th>\n",
-       "      <td>4</td>\n",
-       "      <td>2013-01-14 01:10:00</td>\n",
-       "      <td>82.0</td>\n",
+       "      <th>2013-01-10 00:30:00</th>\n",
+       "      <td>257.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
-       "      <th>4</th>\n",
-       "      <td>5</td>\n",
-       "      <td>2013-01-14 13:10:00</td>\n",
-       "      <td>91.0</td>\n",
+       "      <th>2013-01-10 00:40:00</th>\n",
+       "      <td>267.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
        "</div>"
       ],
       "text/plain": [
-       "   turbine_id         cutoff_time  target\n",
-       "0           1 2013-01-13 13:10:00   112.0\n",
-       "1           2 2013-01-14 08:00:00    98.0\n",
-       "2           3 2013-01-14 02:50:00    69.0\n",
-       "3           4 2013-01-14 01:10:00    82.0\n",
-       "4           5 2013-01-14 13:10:00    91.0"
+       "                                          value\n",
+       "turbine_id signal_id timestamp                 \n",
+       "T001       S01       2013-01-10 00:00:00  323.0\n",
+       "                     2013-01-10 00:10:00  346.0\n",
+       "                     2013-01-10 00:20:00  407.0\n",
+       "                     2013-01-10 00:30:00  257.0\n",
+       "                     2013-01-10 00:40:00  267.0"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "test_target_times.head()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "id": "feb3daa6",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "training shape (18131, 3)\n",
-      "testing shape (100, 3)\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(\"training shape\", train_target_times.shape)\n",
-    "print(\"testing shape\", test_target_times.shape)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "a956f746",
-   "metadata": {},
-   "source": [
-    "## Data Preparation (part of Draco Pipeline)\n",
-    "\n",
-    "* Input: target_times, readings, turbines\n",
-    "* Output: X, y, readings, turbines\n",
-    "* Effect: target_times has been split into X and y"
+    "context['readings'].head()"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "a813a966",
+   "id": "4b46bf1f",
    "metadata": {},
    "source": [
-    "## mlblocks.MLPipeline\n",
-    "\n",
-    "### pandas.DataFrame.resample\n",
-    "\n",
-    "* Input: readings\n",
-    "* Output: readings (resampled)\n",
-    "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
-    "  signal_id and timestamp have been set as a multi-index\n",
-    "  \n",
-    "### pandas.DataFrame.unstack\n",
+    "## pandas.DataFrame.unstack\n",
     "\n",
     "* Input: readings (resampled)\n",
     "* Output: readings (unstacked)\n",
@@ -384,18 +419,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "bb00b3b8",
+   "execution_count": 10,
+   "id": "bb0bac75",
    "metadata": {},
    "outputs": [],
    "source": [
-    "context = pipeline.fit(train_target_times, readings, output_=0)"
+    "step = 1\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "381e361d",
+   "execution_count": 11,
+   "id": "1009407e",
    "metadata": {},
    "outputs": [
     {
@@ -404,7 +440,7 @@
        "dict_keys(['readings', 'turbines', 'X', 'y'])"
       ]
      },
-     "execution_count": 6,
+     "execution_count": 11,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -415,8 +451,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
-   "id": "b41f13c1",
+   "execution_count": 12,
+   "id": "83855579",
    "metadata": {},
    "outputs": [
     {
@@ -442,228 +478,186 @@
        "      <th></th>\n",
        "      <th>turbine_id</th>\n",
        "      <th>timestamp</th>\n",
-       "      <th>value_operational setting 1</th>\n",
-       "      <th>value_operational setting 2</th>\n",
-       "      <th>value_operational setting 3</th>\n",
-       "      <th>value_sensor measurement 1</th>\n",
-       "      <th>value_sensor measurement 10</th>\n",
-       "      <th>value_sensor measurement 11</th>\n",
-       "      <th>value_sensor measurement 12</th>\n",
-       "      <th>value_sensor measurement 13</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
        "      <th>...</th>\n",
-       "      <th>value_sensor measurement 2</th>\n",
-       "      <th>value_sensor measurement 20</th>\n",
-       "      <th>value_sensor measurement 21</th>\n",
-       "      <th>value_sensor measurement 3</th>\n",
-       "      <th>value_sensor measurement 4</th>\n",
-       "      <th>value_sensor measurement 5</th>\n",
-       "      <th>value_sensor measurement 6</th>\n",
-       "      <th>value_sensor measurement 7</th>\n",
-       "      <th>value_sensor measurement 8</th>\n",
-       "      <th>value_sensor measurement 9</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:10:00</td>\n",
-       "      <td>-0.0007</td>\n",
-       "      <td>-0.0004</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.47</td>\n",
-       "      <td>521.66</td>\n",
-       "      <td>2388.02</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>641.82</td>\n",
-       "      <td>39.06</td>\n",
-       "      <td>23.4190</td>\n",
-       "      <td>1589.70</td>\n",
-       "      <td>1400.60</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.36</td>\n",
-       "      <td>2388.06</td>\n",
-       "      <td>9046.19</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:20:00</td>\n",
-       "      <td>0.0019</td>\n",
-       "      <td>-0.0003</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.49</td>\n",
-       "      <td>522.28</td>\n",
-       "      <td>2388.07</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.15</td>\n",
-       "      <td>39.00</td>\n",
-       "      <td>23.4236</td>\n",
-       "      <td>1591.82</td>\n",
-       "      <td>1403.14</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>553.75</td>\n",
-       "      <td>2388.04</td>\n",
-       "      <td>9044.07</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:30:00</td>\n",
-       "      <td>-0.0043</td>\n",
-       "      <td>0.0003</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.27</td>\n",
-       "      <td>522.42</td>\n",
-       "      <td>2388.03</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.35</td>\n",
-       "      <td>38.95</td>\n",
-       "      <td>23.3442</td>\n",
-       "      <td>1587.99</td>\n",
-       "      <td>1404.20</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.26</td>\n",
-       "      <td>2388.08</td>\n",
-       "      <td>9052.94</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:40:00</td>\n",
-       "      <td>0.0007</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.13</td>\n",
-       "      <td>522.86</td>\n",
-       "      <td>2388.08</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.35</td>\n",
-       "      <td>38.88</td>\n",
-       "      <td>23.3739</td>\n",
-       "      <td>1582.79</td>\n",
-       "      <td>1401.87</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.45</td>\n",
-       "      <td>2388.11</td>\n",
-       "      <td>9049.48</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:50:00</td>\n",
-       "      <td>-0.0019</td>\n",
-       "      <td>-0.0002</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.28</td>\n",
-       "      <td>522.19</td>\n",
-       "      <td>2388.04</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.37</td>\n",
-       "      <td>38.90</td>\n",
-       "      <td>23.4044</td>\n",
-       "      <td>1582.85</td>\n",
-       "      <td>1406.22</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.00</td>\n",
-       "      <td>2388.06</td>\n",
-       "      <td>9055.15</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>5 rows × 26 columns</p>\n",
+       "<p>5 rows × 28 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "   turbine_id           timestamp  value_operational setting 1  \\\n",
-       "0           1 2013-01-12 00:10:00                      -0.0007   \n",
-       "1           1 2013-01-12 00:20:00                       0.0019   \n",
-       "2           1 2013-01-12 00:30:00                      -0.0043   \n",
-       "3           1 2013-01-12 00:40:00                       0.0007   \n",
-       "4           1 2013-01-12 00:50:00                      -0.0019   \n",
-       "\n",
-       "   value_operational setting 2  value_operational setting 3  \\\n",
-       "0                      -0.0004                        100.0   \n",
-       "1                      -0.0003                        100.0   \n",
-       "2                       0.0003                        100.0   \n",
-       "3                       0.0000                        100.0   \n",
-       "4                      -0.0002                        100.0   \n",
-       "\n",
-       "   value_sensor measurement 1  value_sensor measurement 10  \\\n",
-       "0                      518.67                          1.3   \n",
-       "1                      518.67                          1.3   \n",
-       "2                      518.67                          1.3   \n",
-       "3                      518.67                          1.3   \n",
-       "4                      518.67                          1.3   \n",
-       "\n",
-       "   value_sensor measurement 11  value_sensor measurement 12  \\\n",
-       "0                        47.47                       521.66   \n",
-       "1                        47.49                       522.28   \n",
-       "2                        47.27                       522.42   \n",
-       "3                        47.13                       522.86   \n",
-       "4                        47.28                       522.19   \n",
+       "  turbine_id           timestamp  value_S01  value_S02  value_S03  value_S04  \\\n",
+       "0       T001 2013-01-10 00:00:00      323.0      320.0      284.0      348.0   \n",
+       "1       T001 2013-01-10 00:10:00      346.0      384.0      367.0      411.0   \n",
+       "2       T001 2013-01-10 00:20:00      407.0      363.0      407.0      393.0   \n",
+       "3       T001 2013-01-10 00:30:00      257.0      307.0      315.0      361.0   \n",
+       "4       T001 2013-01-10 00:40:00      267.0      309.0      314.0      355.0   \n",
        "\n",
-       "   value_sensor measurement 13  ...  value_sensor measurement 2  \\\n",
-       "0                      2388.02  ...                      641.82   \n",
-       "1                      2388.07  ...                      642.15   \n",
-       "2                      2388.03  ...                      642.35   \n",
-       "3                      2388.08  ...                      642.35   \n",
-       "4                      2388.04  ...                      642.37   \n",
+       "   value_S05  value_S06  value_S07  value_S08  ...  value_S17  value_S18  \\\n",
+       "0      273.0      342.0      280.0  3197842.0  ...       11.7  3131020.0   \n",
+       "1      331.0      360.0      249.0  3197900.0  ...       10.2  3131420.0   \n",
+       "2      275.0      335.0      270.0  3197968.0  ...        9.5  3131822.0   \n",
+       "3      317.0      354.0      271.0  3198011.0  ...       10.5  3132179.0   \n",
+       "4      262.0      246.0      212.0  3198056.0  ...        9.6  3132501.0   \n",
        "\n",
-       "   value_sensor measurement 20  value_sensor measurement 21  \\\n",
-       "0                        39.06                      23.4190   \n",
-       "1                        39.00                      23.4236   \n",
-       "2                        38.95                      23.3442   \n",
-       "3                        38.88                      23.3739   \n",
-       "4                        38.90                      23.4044   \n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
        "\n",
-       "   value_sensor measurement 3  value_sensor measurement 4  \\\n",
-       "0                     1589.70                     1400.60   \n",
-       "1                     1591.82                     1403.14   \n",
-       "2                     1587.99                     1404.20   \n",
-       "3                     1582.79                     1401.87   \n",
-       "4                     1582.85                     1406.22   \n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
        "\n",
-       "   value_sensor measurement 5  value_sensor measurement 6  \\\n",
-       "0                       14.62                       21.61   \n",
-       "1                       14.62                       21.61   \n",
-       "2                       14.62                       21.61   \n",
-       "3                       14.62                       21.61   \n",
-       "4                       14.62                       21.61   \n",
-       "\n",
-       "   value_sensor measurement 7  value_sensor measurement 8  \\\n",
-       "0                      554.36                     2388.06   \n",
-       "1                      553.75                     2388.04   \n",
-       "2                      554.26                     2388.08   \n",
-       "3                      554.45                     2388.11   \n",
-       "4                      554.00                     2388.06   \n",
-       "\n",
-       "   value_sensor measurement 9  \n",
-       "0                     9046.19  \n",
-       "1                     9044.07  \n",
-       "2                     9052.94  \n",
-       "3                     9049.48  \n",
-       "4                     9055.15  \n",
-       "\n",
-       "[5 rows x 26 columns]"
+       "[5 rows x 28 columns]"
       ]
      },
-     "execution_count": 7,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -686,19 +680,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
-   "id": "bb0bac75",
+   "execution_count": 13,
+   "id": "6a422d33",
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 1\n",
+    "step = 2\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
-   "id": "1009407e",
+   "execution_count": 14,
+   "id": "334867d7",
    "metadata": {},
    "outputs": [
     {
@@ -707,7 +701,7 @@
        "dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -718,22 +712,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
-   "id": "93104c3b",
+   "execution_count": 15,
+   "id": "df9dbf59",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0    1\n",
-       "1    1\n",
-       "2    1\n",
-       "3    1\n",
-       "4    1\n",
-       "Name: turbine_id, dtype: int64"
+       "0    T001\n",
+       "1    T001\n",
+       "2    T001\n",
+       "3    T001\n",
+       "4    T001\n",
+       "Name: turbine_id, dtype: object"
       ]
      },
-     "execution_count": 10,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -744,8 +738,8 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
-   "id": "83855579",
+   "execution_count": 16,
+   "id": "7891112d",
    "metadata": {},
    "outputs": [
     {
@@ -770,229 +764,187 @@
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
        "      <th>timestamp</th>\n",
-       "      <th>value_operational setting 1</th>\n",
-       "      <th>value_operational setting 2</th>\n",
-       "      <th>value_operational setting 3</th>\n",
-       "      <th>value_sensor measurement 1</th>\n",
-       "      <th>value_sensor measurement 10</th>\n",
-       "      <th>value_sensor measurement 11</th>\n",
-       "      <th>value_sensor measurement 12</th>\n",
-       "      <th>value_sensor measurement 13</th>\n",
-       "      <th>value_sensor measurement 14</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>value_S09</th>\n",
        "      <th>...</th>\n",
-       "      <th>value_sensor measurement 2</th>\n",
-       "      <th>value_sensor measurement 20</th>\n",
-       "      <th>value_sensor measurement 21</th>\n",
-       "      <th>value_sensor measurement 3</th>\n",
-       "      <th>value_sensor measurement 4</th>\n",
-       "      <th>value_sensor measurement 5</th>\n",
-       "      <th>value_sensor measurement 6</th>\n",
-       "      <th>value_sensor measurement 7</th>\n",
-       "      <th>value_sensor measurement 8</th>\n",
-       "      <th>value_sensor measurement 9</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>2013-01-12 00:10:00</td>\n",
-       "      <td>-0.0007</td>\n",
-       "      <td>-0.0004</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.47</td>\n",
-       "      <td>521.66</td>\n",
-       "      <td>2388.02</td>\n",
-       "      <td>8138.62</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
+       "      <td>695000.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>641.82</td>\n",
-       "      <td>39.06</td>\n",
-       "      <td>23.4190</td>\n",
-       "      <td>1589.70</td>\n",
-       "      <td>1400.60</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.36</td>\n",
-       "      <td>2388.06</td>\n",
-       "      <td>9046.19</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>2013-01-12 00:20:00</td>\n",
-       "      <td>0.0019</td>\n",
-       "      <td>-0.0003</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.49</td>\n",
-       "      <td>522.28</td>\n",
-       "      <td>2388.07</td>\n",
-       "      <td>8131.49</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
+       "      <td>695063.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.15</td>\n",
-       "      <td>39.00</td>\n",
-       "      <td>23.4236</td>\n",
-       "      <td>1591.82</td>\n",
-       "      <td>1403.14</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>553.75</td>\n",
-       "      <td>2388.04</td>\n",
-       "      <td>9044.07</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>2013-01-12 00:30:00</td>\n",
-       "      <td>-0.0043</td>\n",
-       "      <td>0.0003</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.27</td>\n",
-       "      <td>522.42</td>\n",
-       "      <td>2388.03</td>\n",
-       "      <td>8133.23</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
+       "      <td>695124.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.35</td>\n",
-       "      <td>38.95</td>\n",
-       "      <td>23.3442</td>\n",
-       "      <td>1587.99</td>\n",
-       "      <td>1404.20</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.26</td>\n",
-       "      <td>2388.08</td>\n",
-       "      <td>9052.94</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>2013-01-12 00:40:00</td>\n",
-       "      <td>0.0007</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.13</td>\n",
-       "      <td>522.86</td>\n",
-       "      <td>2388.08</td>\n",
-       "      <td>8133.83</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
+       "      <td>695175.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.35</td>\n",
-       "      <td>38.88</td>\n",
-       "      <td>23.3739</td>\n",
-       "      <td>1582.79</td>\n",
-       "      <td>1401.87</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.45</td>\n",
-       "      <td>2388.11</td>\n",
-       "      <td>9049.48</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>2013-01-12 00:50:00</td>\n",
-       "      <td>-0.0019</td>\n",
-       "      <td>-0.0002</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.28</td>\n",
-       "      <td>522.19</td>\n",
-       "      <td>2388.04</td>\n",
-       "      <td>8133.80</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
+       "      <td>695226.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.37</td>\n",
-       "      <td>38.90</td>\n",
-       "      <td>23.4044</td>\n",
-       "      <td>1582.85</td>\n",
-       "      <td>1406.22</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.00</td>\n",
-       "      <td>2388.06</td>\n",
-       "      <td>9055.15</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>5 rows × 25 columns</p>\n",
+       "<p>5 rows × 27 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "            timestamp  value_operational setting 1  \\\n",
-       "0 2013-01-12 00:10:00                      -0.0007   \n",
-       "1 2013-01-12 00:20:00                       0.0019   \n",
-       "2 2013-01-12 00:30:00                      -0.0043   \n",
-       "3 2013-01-12 00:40:00                       0.0007   \n",
-       "4 2013-01-12 00:50:00                      -0.0019   \n",
-       "\n",
-       "   value_operational setting 2  value_operational setting 3  \\\n",
-       "0                      -0.0004                        100.0   \n",
-       "1                      -0.0003                        100.0   \n",
-       "2                       0.0003                        100.0   \n",
-       "3                       0.0000                        100.0   \n",
-       "4                      -0.0002                        100.0   \n",
-       "\n",
-       "   value_sensor measurement 1  value_sensor measurement 10  \\\n",
-       "0                      518.67                          1.3   \n",
-       "1                      518.67                          1.3   \n",
-       "2                      518.67                          1.3   \n",
-       "3                      518.67                          1.3   \n",
-       "4                      518.67                          1.3   \n",
-       "\n",
-       "   value_sensor measurement 11  value_sensor measurement 12  \\\n",
-       "0                        47.47                       521.66   \n",
-       "1                        47.49                       522.28   \n",
-       "2                        47.27                       522.42   \n",
-       "3                        47.13                       522.86   \n",
-       "4                        47.28                       522.19   \n",
-       "\n",
-       "   value_sensor measurement 13  value_sensor measurement 14  ...  \\\n",
-       "0                      2388.02                      8138.62  ...   \n",
-       "1                      2388.07                      8131.49  ...   \n",
-       "2                      2388.03                      8133.23  ...   \n",
-       "3                      2388.08                      8133.83  ...   \n",
-       "4                      2388.04                      8133.80  ...   \n",
-       "\n",
-       "   value_sensor measurement 2  value_sensor measurement 20  \\\n",
-       "0                      641.82                        39.06   \n",
-       "1                      642.15                        39.00   \n",
-       "2                      642.35                        38.95   \n",
-       "3                      642.35                        38.88   \n",
-       "4                      642.37                        38.90   \n",
+       "            timestamp  value_S01  value_S02  value_S03  value_S04  value_S05  \\\n",
+       "0 2013-01-10 00:00:00      323.0      320.0      284.0      348.0      273.0   \n",
+       "1 2013-01-10 00:10:00      346.0      384.0      367.0      411.0      331.0   \n",
+       "2 2013-01-10 00:20:00      407.0      363.0      407.0      393.0      275.0   \n",
+       "3 2013-01-10 00:30:00      257.0      307.0      315.0      361.0      317.0   \n",
+       "4 2013-01-10 00:40:00      267.0      309.0      314.0      355.0      262.0   \n",
        "\n",
-       "   value_sensor measurement 21  value_sensor measurement 3  \\\n",
-       "0                      23.4190                     1589.70   \n",
-       "1                      23.4236                     1591.82   \n",
-       "2                      23.3442                     1587.99   \n",
-       "3                      23.3739                     1582.79   \n",
-       "4                      23.4044                     1582.85   \n",
+       "   value_S06  value_S07  value_S08  value_S09  ...  value_S17  value_S18  \\\n",
+       "0      342.0      280.0  3197842.0   695000.0  ...       11.7  3131020.0   \n",
+       "1      360.0      249.0  3197900.0   695063.0  ...       10.2  3131420.0   \n",
+       "2      335.0      270.0  3197968.0   695124.0  ...        9.5  3131822.0   \n",
+       "3      354.0      271.0  3198011.0   695175.0  ...       10.5  3132179.0   \n",
+       "4      246.0      212.0  3198056.0   695226.0  ...        9.6  3132501.0   \n",
        "\n",
-       "   value_sensor measurement 4  value_sensor measurement 5  \\\n",
-       "0                     1400.60                       14.62   \n",
-       "1                     1403.14                       14.62   \n",
-       "2                     1404.20                       14.62   \n",
-       "3                     1401.87                       14.62   \n",
-       "4                     1406.22                       14.62   \n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
        "\n",
-       "   value_sensor measurement 6  value_sensor measurement 7  \\\n",
-       "0                       21.61                      554.36   \n",
-       "1                       21.61                      553.75   \n",
-       "2                       21.61                      554.26   \n",
-       "3                       21.61                      554.45   \n",
-       "4                       21.61                      554.00   \n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
        "\n",
-       "   value_sensor measurement 8  value_sensor measurement 9  \n",
-       "0                     2388.06                     9046.19  \n",
-       "1                     2388.04                     9044.07  \n",
-       "2                     2388.08                     9052.94  \n",
-       "3                     2388.11                     9049.48  \n",
-       "4                     2388.06                     9055.15  \n",
-       "\n",
-       "[5 rows x 25 columns]"
+       "[5 rows x 27 columns]"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1015,18 +967,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 17,
    "id": "ebcad5cd",
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 2\n",
+    "step = 3\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 18,
    "id": "d497ab07",
    "metadata": {},
    "outputs": [
@@ -1036,7 +988,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1047,22 +999,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 19,
    "id": "2c3bfa0b",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "0   2013-01-12 00:10:00\n",
-       "1   2013-01-12 00:20:00\n",
-       "2   2013-01-12 00:30:00\n",
-       "3   2013-01-12 00:40:00\n",
-       "4   2013-01-12 00:50:00\n",
+       "0   2013-01-10 00:00:00\n",
+       "1   2013-01-10 00:10:00\n",
+       "2   2013-01-10 00:20:00\n",
+       "3   2013-01-10 00:30:00\n",
+       "4   2013-01-10 00:40:00\n",
        "Name: timestamp, dtype: datetime64[ns]"
       ]
      },
-     "execution_count": 14,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1073,7 +1025,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 20,
    "id": "3c837b44",
    "metadata": {},
    "outputs": [
@@ -1098,230 +1050,188 @@
        "  <thead>\n",
        "    <tr style=\"text-align: right;\">\n",
        "      <th></th>\n",
-       "      <th>value_operational setting 1</th>\n",
-       "      <th>value_operational setting 2</th>\n",
-       "      <th>value_operational setting 3</th>\n",
-       "      <th>value_sensor measurement 1</th>\n",
-       "      <th>value_sensor measurement 10</th>\n",
-       "      <th>value_sensor measurement 11</th>\n",
-       "      <th>value_sensor measurement 12</th>\n",
-       "      <th>value_sensor measurement 13</th>\n",
-       "      <th>value_sensor measurement 14</th>\n",
-       "      <th>value_sensor measurement 15</th>\n",
+       "      <th>value_S01</th>\n",
+       "      <th>value_S02</th>\n",
+       "      <th>value_S03</th>\n",
+       "      <th>value_S04</th>\n",
+       "      <th>value_S05</th>\n",
+       "      <th>value_S06</th>\n",
+       "      <th>value_S07</th>\n",
+       "      <th>value_S08</th>\n",
+       "      <th>value_S09</th>\n",
+       "      <th>value_S10</th>\n",
        "      <th>...</th>\n",
-       "      <th>value_sensor measurement 2</th>\n",
-       "      <th>value_sensor measurement 20</th>\n",
-       "      <th>value_sensor measurement 21</th>\n",
-       "      <th>value_sensor measurement 3</th>\n",
-       "      <th>value_sensor measurement 4</th>\n",
-       "      <th>value_sensor measurement 5</th>\n",
-       "      <th>value_sensor measurement 6</th>\n",
-       "      <th>value_sensor measurement 7</th>\n",
-       "      <th>value_sensor measurement 8</th>\n",
-       "      <th>value_sensor measurement 9</th>\n",
+       "      <th>value_S17</th>\n",
+       "      <th>value_S18</th>\n",
+       "      <th>value_S19</th>\n",
+       "      <th>value_S20</th>\n",
+       "      <th>value_S21</th>\n",
+       "      <th>value_S22</th>\n",
+       "      <th>value_S23</th>\n",
+       "      <th>value_S24</th>\n",
+       "      <th>value_S25</th>\n",
+       "      <th>value_S26</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.0007</td>\n",
-       "      <td>-0.0004</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.47</td>\n",
-       "      <td>521.66</td>\n",
-       "      <td>2388.02</td>\n",
-       "      <td>8138.62</td>\n",
-       "      <td>8.4195</td>\n",
+       "      <td>323.0</td>\n",
+       "      <td>320.0</td>\n",
+       "      <td>284.0</td>\n",
+       "      <td>348.0</td>\n",
+       "      <td>273.0</td>\n",
+       "      <td>342.0</td>\n",
+       "      <td>280.0</td>\n",
+       "      <td>3197842.0</td>\n",
+       "      <td>695000.0</td>\n",
+       "      <td>3348234.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>641.82</td>\n",
-       "      <td>39.06</td>\n",
-       "      <td>23.4190</td>\n",
-       "      <td>1589.70</td>\n",
-       "      <td>1400.60</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.36</td>\n",
-       "      <td>2388.06</td>\n",
-       "      <td>9046.19</td>\n",
+       "      <td>11.7</td>\n",
+       "      <td>3131020.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>47.0</td>\n",
+       "      <td>356.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>0.0019</td>\n",
-       "      <td>-0.0003</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.49</td>\n",
-       "      <td>522.28</td>\n",
-       "      <td>2388.07</td>\n",
-       "      <td>8131.49</td>\n",
-       "      <td>8.4318</td>\n",
+       "      <td>346.0</td>\n",
+       "      <td>384.0</td>\n",
+       "      <td>367.0</td>\n",
+       "      <td>411.0</td>\n",
+       "      <td>331.0</td>\n",
+       "      <td>360.0</td>\n",
+       "      <td>249.0</td>\n",
+       "      <td>3197900.0</td>\n",
+       "      <td>695063.0</td>\n",
+       "      <td>3348296.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.15</td>\n",
-       "      <td>39.00</td>\n",
-       "      <td>23.4236</td>\n",
-       "      <td>1591.82</td>\n",
-       "      <td>1403.14</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>553.75</td>\n",
-       "      <td>2388.04</td>\n",
-       "      <td>9044.07</td>\n",
+       "      <td>10.2</td>\n",
+       "      <td>3131420.0</td>\n",
+       "      <td>58.0</td>\n",
+       "      <td>63.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>42.0</td>\n",
+       "      <td>400.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.0043</td>\n",
-       "      <td>0.0003</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.27</td>\n",
-       "      <td>522.42</td>\n",
-       "      <td>2388.03</td>\n",
-       "      <td>8133.23</td>\n",
-       "      <td>8.4178</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>363.0</td>\n",
+       "      <td>407.0</td>\n",
+       "      <td>393.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>335.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>3197968.0</td>\n",
+       "      <td>695124.0</td>\n",
+       "      <td>3348363.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.35</td>\n",
-       "      <td>38.95</td>\n",
-       "      <td>23.3442</td>\n",
-       "      <td>1587.99</td>\n",
-       "      <td>1404.20</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.26</td>\n",
-       "      <td>2388.08</td>\n",
-       "      <td>9052.94</td>\n",
+       "      <td>9.5</td>\n",
+       "      <td>3131822.0</td>\n",
+       "      <td>68.0</td>\n",
+       "      <td>61.0</td>\n",
+       "      <td>67.0</td>\n",
+       "      <td>66.0</td>\n",
+       "      <td>46.0</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>402.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>0.0007</td>\n",
-       "      <td>0.0000</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.13</td>\n",
-       "      <td>522.86</td>\n",
-       "      <td>2388.08</td>\n",
-       "      <td>8133.83</td>\n",
-       "      <td>8.3682</td>\n",
+       "      <td>257.0</td>\n",
+       "      <td>307.0</td>\n",
+       "      <td>315.0</td>\n",
+       "      <td>361.0</td>\n",
+       "      <td>317.0</td>\n",
+       "      <td>354.0</td>\n",
+       "      <td>271.0</td>\n",
+       "      <td>3198011.0</td>\n",
+       "      <td>695175.0</td>\n",
+       "      <td>3348416.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.35</td>\n",
-       "      <td>38.88</td>\n",
-       "      <td>23.3739</td>\n",
-       "      <td>1582.79</td>\n",
-       "      <td>1401.87</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.45</td>\n",
-       "      <td>2388.11</td>\n",
-       "      <td>9049.48</td>\n",
+       "      <td>10.5</td>\n",
+       "      <td>3132179.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>62.0</td>\n",
+       "      <td>53.0</td>\n",
+       "      <td>60.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>357.0</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.0019</td>\n",
-       "      <td>-0.0002</td>\n",
-       "      <td>100.0</td>\n",
-       "      <td>518.67</td>\n",
-       "      <td>1.3</td>\n",
-       "      <td>47.28</td>\n",
-       "      <td>522.19</td>\n",
-       "      <td>2388.04</td>\n",
-       "      <td>8133.80</td>\n",
-       "      <td>8.4294</td>\n",
+       "      <td>267.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>314.0</td>\n",
+       "      <td>355.0</td>\n",
+       "      <td>262.0</td>\n",
+       "      <td>246.0</td>\n",
+       "      <td>212.0</td>\n",
+       "      <td>3198056.0</td>\n",
+       "      <td>695226.0</td>\n",
+       "      <td>3348470.0</td>\n",
        "      <td>...</td>\n",
-       "      <td>642.37</td>\n",
-       "      <td>38.90</td>\n",
-       "      <td>23.4044</td>\n",
-       "      <td>1582.85</td>\n",
-       "      <td>1406.22</td>\n",
-       "      <td>14.62</td>\n",
-       "      <td>21.61</td>\n",
-       "      <td>554.00</td>\n",
-       "      <td>2388.06</td>\n",
-       "      <td>9055.15</td>\n",
+       "      <td>9.6</td>\n",
+       "      <td>3132501.0</td>\n",
+       "      <td>45.0</td>\n",
+       "      <td>51.0</td>\n",
+       "      <td>54.0</td>\n",
+       "      <td>59.0</td>\n",
+       "      <td>43.0</td>\n",
+       "      <td>41.0</td>\n",
+       "      <td>36.0</td>\n",
+       "      <td>322.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>5 rows × 24 columns</p>\n",
+       "<p>5 rows × 26 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "   value_operational setting 1  value_operational setting 2  \\\n",
-       "0                      -0.0007                      -0.0004   \n",
-       "1                       0.0019                      -0.0003   \n",
-       "2                      -0.0043                       0.0003   \n",
-       "3                       0.0007                       0.0000   \n",
-       "4                      -0.0019                      -0.0002   \n",
-       "\n",
-       "   value_operational setting 3  value_sensor measurement 1  \\\n",
-       "0                        100.0                      518.67   \n",
-       "1                        100.0                      518.67   \n",
-       "2                        100.0                      518.67   \n",
-       "3                        100.0                      518.67   \n",
-       "4                        100.0                      518.67   \n",
-       "\n",
-       "   value_sensor measurement 10  value_sensor measurement 11  \\\n",
-       "0                          1.3                        47.47   \n",
-       "1                          1.3                        47.49   \n",
-       "2                          1.3                        47.27   \n",
-       "3                          1.3                        47.13   \n",
-       "4                          1.3                        47.28   \n",
-       "\n",
-       "   value_sensor measurement 12  value_sensor measurement 13  \\\n",
-       "0                       521.66                      2388.02   \n",
-       "1                       522.28                      2388.07   \n",
-       "2                       522.42                      2388.03   \n",
-       "3                       522.86                      2388.08   \n",
-       "4                       522.19                      2388.04   \n",
-       "\n",
-       "   value_sensor measurement 14  value_sensor measurement 15  ...  \\\n",
-       "0                      8138.62                       8.4195  ...   \n",
-       "1                      8131.49                       8.4318  ...   \n",
-       "2                      8133.23                       8.4178  ...   \n",
-       "3                      8133.83                       8.3682  ...   \n",
-       "4                      8133.80                       8.4294  ...   \n",
-       "\n",
-       "   value_sensor measurement 2  value_sensor measurement 20  \\\n",
-       "0                      641.82                        39.06   \n",
-       "1                      642.15                        39.00   \n",
-       "2                      642.35                        38.95   \n",
-       "3                      642.35                        38.88   \n",
-       "4                      642.37                        38.90   \n",
+       "   value_S01  value_S02  value_S03  value_S04  value_S05  value_S06  \\\n",
+       "0      323.0      320.0      284.0      348.0      273.0      342.0   \n",
+       "1      346.0      384.0      367.0      411.0      331.0      360.0   \n",
+       "2      407.0      363.0      407.0      393.0      275.0      335.0   \n",
+       "3      257.0      307.0      315.0      361.0      317.0      354.0   \n",
+       "4      267.0      309.0      314.0      355.0      262.0      246.0   \n",
        "\n",
-       "   value_sensor measurement 21  value_sensor measurement 3  \\\n",
-       "0                      23.4190                     1589.70   \n",
-       "1                      23.4236                     1591.82   \n",
-       "2                      23.3442                     1587.99   \n",
-       "3                      23.3739                     1582.79   \n",
-       "4                      23.4044                     1582.85   \n",
+       "   value_S07  value_S08  value_S09  value_S10  ...  value_S17  value_S18  \\\n",
+       "0      280.0  3197842.0   695000.0  3348234.0  ...       11.7  3131020.0   \n",
+       "1      249.0  3197900.0   695063.0  3348296.0  ...       10.2  3131420.0   \n",
+       "2      270.0  3197968.0   695124.0  3348363.0  ...        9.5  3131822.0   \n",
+       "3      271.0  3198011.0   695175.0  3348416.0  ...       10.5  3132179.0   \n",
+       "4      212.0  3198056.0   695226.0  3348470.0  ...        9.6  3132501.0   \n",
        "\n",
-       "   value_sensor measurement 4  value_sensor measurement 5  \\\n",
-       "0                     1400.60                       14.62   \n",
-       "1                     1403.14                       14.62   \n",
-       "2                     1404.20                       14.62   \n",
-       "3                     1401.87                       14.62   \n",
-       "4                     1406.22                       14.62   \n",
+       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
+       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
+       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
+       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
+       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
+       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
        "\n",
-       "   value_sensor measurement 6  value_sensor measurement 7  \\\n",
-       "0                       21.61                      554.36   \n",
-       "1                       21.61                      553.75   \n",
-       "2                       21.61                      554.26   \n",
-       "3                       21.61                      554.45   \n",
-       "4                       21.61                      554.00   \n",
+       "   value_S25  value_S26  \n",
+       "0       47.0      356.0  \n",
+       "1       42.0      400.0  \n",
+       "2       45.0      402.0  \n",
+       "3       45.0      357.0  \n",
+       "4       36.0      322.0  \n",
        "\n",
-       "   value_sensor measurement 8  value_sensor measurement 9  \n",
-       "0                     2388.06                     9046.19  \n",
-       "1                     2388.04                     9044.07  \n",
-       "2                     2388.08                     9052.94  \n",
-       "3                     2388.11                     9049.48  \n",
-       "4                     2388.06                     9055.15  \n",
-       "\n",
-       "[5 rows x 24 columns]"
+       "[5 rows x 26 columns]"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1344,18 +1254,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 21,
    "id": "3ad08e01",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/sklearn/impute/_base.py:356: FutureWarning: The 'verbose' parameter was deprecated in version 1.1 and will be removed in 1.3. A warning will always be raised upon the removal of empty columns in the future version.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
-    "step = 3\n",
+    "step = 4\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 22,
    "id": "19c4ee50",
    "metadata": {},
    "outputs": [
@@ -1365,7 +1284,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1376,46 +1295,51 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 23,
    "id": "af5f9dc1",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[-7.00000e-04, -4.00000e-04,  1.00000e+02,  5.18670e+02,\n",
-       "         1.30000e+00,  4.74700e+01,  5.21660e+02,  2.38802e+03,\n",
-       "         8.13862e+03,  8.41950e+00,  3.00000e-02,  3.92000e+02,\n",
-       "         2.38800e+03,  1.00000e+02,  6.41820e+02,  3.90600e+01,\n",
-       "         2.34190e+01,  1.58970e+03,  1.40060e+03,  1.46200e+01,\n",
-       "         2.16100e+01,  5.54360e+02,  2.38806e+03,  9.04619e+03],\n",
-       "       [ 1.90000e-03, -3.00000e-04,  1.00000e+02,  5.18670e+02,\n",
-       "         1.30000e+00,  4.74900e+01,  5.22280e+02,  2.38807e+03,\n",
-       "         8.13149e+03,  8.43180e+00,  3.00000e-02,  3.92000e+02,\n",
-       "         2.38800e+03,  1.00000e+02,  6.42150e+02,  3.90000e+01,\n",
-       "         2.34236e+01,  1.59182e+03,  1.40314e+03,  1.46200e+01,\n",
-       "         2.16100e+01,  5.53750e+02,  2.38804e+03,  9.04407e+03],\n",
-       "       [-4.30000e-03,  3.00000e-04,  1.00000e+02,  5.18670e+02,\n",
-       "         1.30000e+00,  4.72700e+01,  5.22420e+02,  2.38803e+03,\n",
-       "         8.13323e+03,  8.41780e+00,  3.00000e-02,  3.90000e+02,\n",
-       "         2.38800e+03,  1.00000e+02,  6.42350e+02,  3.89500e+01,\n",
-       "         2.33442e+01,  1.58799e+03,  1.40420e+03,  1.46200e+01,\n",
-       "         2.16100e+01,  5.54260e+02,  2.38808e+03,  9.05294e+03],\n",
-       "       [ 7.00000e-04,  0.00000e+00,  1.00000e+02,  5.18670e+02,\n",
-       "         1.30000e+00,  4.71300e+01,  5.22860e+02,  2.38808e+03,\n",
-       "         8.13383e+03,  8.36820e+00,  3.00000e-02,  3.92000e+02,\n",
-       "         2.38800e+03,  1.00000e+02,  6.42350e+02,  3.88800e+01,\n",
-       "         2.33739e+01,  1.58279e+03,  1.40187e+03,  1.46200e+01,\n",
-       "         2.16100e+01,  5.54450e+02,  2.38811e+03,  9.04948e+03],\n",
-       "       [-1.90000e-03, -2.00000e-04,  1.00000e+02,  5.18670e+02,\n",
-       "         1.30000e+00,  4.72800e+01,  5.22190e+02,  2.38804e+03,\n",
-       "         8.13380e+03,  8.42940e+00,  3.00000e-02,  3.93000e+02,\n",
-       "         2.38800e+03,  1.00000e+02,  6.42370e+02,  3.89000e+01,\n",
-       "         2.34044e+01,  1.58285e+03,  1.40622e+03,  1.46200e+01,\n",
-       "         2.16100e+01,  5.54000e+02,  2.38806e+03,  9.05515e+03]])"
+       "array([[3.230000e+02, 3.200000e+02, 2.840000e+02, 3.480000e+02,\n",
+       "        2.730000e+02, 3.420000e+02, 2.800000e+02, 3.197842e+06,\n",
+       "        6.950000e+05, 3.348234e+06, 3.436762e+06, 3.322362e+06,\n",
+       "        3.357952e+06, 3.223797e+06, 8.300000e+00, 6.000000e+00,\n",
+       "        1.170000e+01, 3.131020e+06, 5.500000e+01, 5.500000e+01,\n",
+       "        4.700000e+01, 5.800000e+01, 4.500000e+01, 5.800000e+01,\n",
+       "        4.700000e+01, 3.560000e+02],\n",
+       "       [3.460000e+02, 3.840000e+02, 3.670000e+02, 4.110000e+02,\n",
+       "        3.310000e+02, 3.600000e+02, 2.490000e+02, 3.197900e+06,\n",
+       "        6.950630e+05, 3.348296e+06, 3.436829e+06, 3.322417e+06,\n",
+       "        3.358013e+06, 3.223839e+06, 7.600000e+00, 5.000000e+00,\n",
+       "        1.020000e+01, 3.131420e+06, 5.800000e+01, 6.300000e+01,\n",
+       "        6.200000e+01, 6.700000e+01, 5.500000e+01, 6.100000e+01,\n",
+       "        4.200000e+01, 4.000000e+02],\n",
+       "       [4.070000e+02, 3.630000e+02, 4.070000e+02, 3.930000e+02,\n",
+       "        2.750000e+02, 3.350000e+02, 2.700000e+02, 3.197968e+06,\n",
+       "        6.951240e+05, 3.348363e+06, 3.436895e+06, 3.322463e+06,\n",
+       "        3.358068e+06, 3.223884e+06, 7.800000e+00, 5.700000e+00,\n",
+       "        9.500000e+00, 3.131822e+06, 6.800000e+01, 6.100000e+01,\n",
+       "        6.700000e+01, 6.600000e+01, 4.600000e+01, 5.500000e+01,\n",
+       "        4.500000e+01, 4.020000e+02],\n",
+       "       [2.570000e+02, 3.070000e+02, 3.150000e+02, 3.610000e+02,\n",
+       "        3.170000e+02, 3.540000e+02, 2.710000e+02, 3.198011e+06,\n",
+       "        6.951750e+05, 3.348416e+06, 3.436957e+06, 3.322516e+06,\n",
+       "        3.358128e+06, 3.223929e+06, 8.600000e+00, 6.600000e+00,\n",
+       "        1.050000e+01, 3.132179e+06, 4.300000e+01, 5.100000e+01,\n",
+       "        5.300000e+01, 6.200000e+01, 5.300000e+01, 6.000000e+01,\n",
+       "        4.500000e+01, 3.570000e+02],\n",
+       "       [2.670000e+02, 3.090000e+02, 3.140000e+02, 3.550000e+02,\n",
+       "        2.620000e+02, 2.460000e+02, 2.120000e+02, 3.198056e+06,\n",
+       "        6.952260e+05, 3.348470e+06, 3.437016e+06, 3.322559e+06,\n",
+       "        3.358169e+06, 3.223965e+06, 7.500000e+00, 5.900000e+00,\n",
+       "        9.600000e+00, 3.132501e+06, 4.500000e+01, 5.100000e+01,\n",
+       "        5.400000e+01, 5.900000e+01, 4.300000e+01, 4.100000e+01,\n",
+       "        3.600000e+01, 3.220000e+02]])"
       ]
      },
-     "execution_count": 18,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1438,18 +1362,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 24,
    "id": "f50662d2",
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 4\n",
+    "step = 5\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 25,
    "id": "37bf8d65",
    "metadata": {},
    "outputs": [
@@ -1459,7 +1383,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1470,41 +1394,46 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 26,
    "id": "73c5d941",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[-0.08045977, -0.69230769, -1.        , -1.        , -1.        ,\n",
-       "        -0.22543353,  0.17159763, -0.58823529, -0.60078439, -0.2720277 ,\n",
-       "        -1.        , -0.33333333, -1.        , -1.        , -0.59411765,\n",
-       "         0.42635659,  0.40377157, -0.13682891, -0.38048616, -1.        ,\n",
-       "         1.        ,  0.45249597, -0.49253731, -0.78048999],\n",
-       "       [ 0.2183908 , -0.53846154, -1.        , -1.        , -1.        ,\n",
-       "        -0.20231214,  0.41617357, -0.44117647, -0.674373  , -0.17737591,\n",
-       "        -1.        , -0.33333333, -1.        , -1.        , -0.4       ,\n",
-       "         0.33333333,  0.41607597, -0.04825569, -0.29473329, -1.        ,\n",
-       "         1.        ,  0.25603865, -0.55223881, -0.79951539],\n",
-       "       [-0.49425287,  0.38461538, -1.        , -1.        , -1.        ,\n",
-       "        -0.4566474 ,  0.47140039, -0.55882353, -0.65641449, -0.28510966,\n",
-       "        -1.        , -0.66666667, -1.        , -1.        , -0.28235294,\n",
-       "         0.25581395,  0.20369132, -0.2082724 , -0.25894666, -1.        ,\n",
-       "         1.        ,  0.42028986, -0.43283582, -0.71991385],\n",
-       "       [ 0.08045977, -0.07692308, -1.        , -1.        , -1.        ,\n",
-       "        -0.61849711,  0.64497041, -0.41176471, -0.6502219 , -0.66679492,\n",
-       "        -1.        , -0.33333333, -1.        , -1.        , -0.28235294,\n",
-       "         0.14728682,  0.28313495, -0.42552747, -0.33760972, -1.        ,\n",
-       "         1.        ,  0.48148148, -0.34328358, -0.75096473],\n",
-       "       [-0.2183908 , -0.38461538, -1.        , -1.        , -1.        ,\n",
-       "        -0.44508671,  0.38067061, -0.52941176, -0.65053153, -0.19584456,\n",
-       "        -1.        , -0.16666667, -1.        , -1.        , -0.27058824,\n",
-       "         0.17829457,  0.36471847, -0.42302068, -0.19074949, -1.        ,\n",
-       "         1.        ,  0.33655395, -0.49253731, -0.70008077]])"
+       "array([[-0.23563892, -0.24267292, -0.3286385 , -0.17702227, -0.35287222,\n",
+       "        -0.19248826, -0.3317757 , -1.        , -1.        , -1.        ,\n",
+       "        -1.        , -1.        , -1.        , -1.        , -0.11702128,\n",
+       "        -0.24050633, -0.25714286, -0.37378787, -0.22758621, -0.22758621,\n",
+       "        -0.31972789, -0.1862069 , -0.36986301, -0.1862069 , -0.33793103,\n",
+       "        -0.26141079],\n",
+       "       [-0.18171161, -0.0926143 , -0.13380282, -0.02930832, -0.21688159,\n",
+       "        -0.15023474, -0.40420561, -0.99995911, -0.99995779, -0.99995941,\n",
+       "        -0.99995718, -0.99996326, -0.99996042, -0.99997164, -0.19148936,\n",
+       "        -0.36708861, -0.35238095, -0.37370786, -0.1862069 , -0.11724138,\n",
+       "        -0.11564626, -0.06206897, -0.23287671, -0.14482759, -0.40689655,\n",
+       "        -0.17012448],\n",
+       "       [-0.03868699, -0.14185229, -0.0399061 , -0.07151231, -0.34818288,\n",
+       "        -0.20892019, -0.35514019, -0.99991116, -0.99991693, -0.99991555,\n",
+       "        -0.999915  , -0.99993254, -0.99992474, -0.99994125, -0.17021277,\n",
+       "        -0.27848101, -0.3968254 , -0.37362746, -0.04827586, -0.14482759,\n",
+       "        -0.04761905, -0.07586207, -0.35616438, -0.22758621, -0.36551724,\n",
+       "        -0.1659751 ],\n",
+       "       [-0.39038687, -0.27315358, -0.25586854, -0.14654162, -0.24970692,\n",
+       "        -0.16431925, -0.35280374, -0.99988085, -0.99988276, -0.99988086,\n",
+       "        -0.99987538, -0.99989714, -0.99988581, -0.99991086, -0.08510638,\n",
+       "        -0.16455696, -0.33333333, -0.37355606, -0.39310345, -0.28275862,\n",
+       "        -0.23809524, -0.13103448, -0.26027397, -0.15862069, -0.36551724,\n",
+       "        -0.2593361 ],\n",
+       "       [-0.36694021, -0.26846424, -0.25821596, -0.16060961, -0.37866354,\n",
+       "        -0.41784038, -0.49065421, -0.99984912, -0.99984859, -0.99984551,\n",
+       "        -0.99983767, -0.99986841, -0.99985921, -0.99988655, -0.20212766,\n",
+       "        -0.25316456, -0.39047619, -0.37349166, -0.36551724, -0.28275862,\n",
+       "        -0.2244898 , -0.17241379, -0.39726027, -0.42068966, -0.48965517,\n",
+       "        -0.33195021]])"
       ]
      },
-     "execution_count": 21,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1527,18 +1456,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 27,
    "id": "4722001e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 5\n",
+    "step = 6\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 28,
    "id": "34b5d2ca",
    "metadata": {},
    "outputs": [
@@ -1548,7 +1477,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1559,7 +1488,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 29,
    "id": "011b9c51",
    "metadata": {},
    "outputs": [
@@ -1595,8 +1524,6 @@
        "      <th>8</th>\n",
        "      <th>9</th>\n",
        "      <th>...</th>\n",
-       "      <th>14</th>\n",
-       "      <th>15</th>\n",
        "      <th>16</th>\n",
        "      <th>17</th>\n",
        "      <th>18</th>\n",
@@ -1605,160 +1532,162 @@
        "      <th>21</th>\n",
        "      <th>22</th>\n",
        "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.080460</td>\n",
-       "      <td>-0.692308</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.225434</td>\n",
-       "      <td>0.171598</td>\n",
-       "      <td>-0.588235</td>\n",
-       "      <td>-0.600784</td>\n",
-       "      <td>-0.272028</td>\n",
+       "      <td>-0.235639</td>\n",
+       "      <td>-0.242673</td>\n",
+       "      <td>-0.328638</td>\n",
+       "      <td>-0.177022</td>\n",
+       "      <td>-0.352872</td>\n",
+       "      <td>-0.192488</td>\n",
+       "      <td>-0.331776</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.594118</td>\n",
-       "      <td>0.426357</td>\n",
-       "      <td>0.403772</td>\n",
-       "      <td>-0.136829</td>\n",
-       "      <td>-0.380486</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.452496</td>\n",
-       "      <td>-0.492537</td>\n",
-       "      <td>-0.780490</td>\n",
+       "      <td>-0.257143</td>\n",
+       "      <td>-0.373788</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.319728</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.369863</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.337931</td>\n",
+       "      <td>-0.261411</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>0.218391</td>\n",
-       "      <td>-0.538462</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.202312</td>\n",
-       "      <td>0.416174</td>\n",
-       "      <td>-0.441176</td>\n",
-       "      <td>-0.674373</td>\n",
-       "      <td>-0.177376</td>\n",
+       "      <td>-0.181712</td>\n",
+       "      <td>-0.092614</td>\n",
+       "      <td>-0.133803</td>\n",
+       "      <td>-0.029308</td>\n",
+       "      <td>-0.216882</td>\n",
+       "      <td>-0.150235</td>\n",
+       "      <td>-0.404206</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>-0.999958</td>\n",
+       "      <td>-0.999959</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.400000</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>0.416076</td>\n",
-       "      <td>-0.048256</td>\n",
-       "      <td>-0.294733</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.256039</td>\n",
-       "      <td>-0.552239</td>\n",
-       "      <td>-0.799515</td>\n",
+       "      <td>-0.352381</td>\n",
+       "      <td>-0.373708</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.117241</td>\n",
+       "      <td>-0.115646</td>\n",
+       "      <td>-0.062069</td>\n",
+       "      <td>-0.232877</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.406897</td>\n",
+       "      <td>-0.170124</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.494253</td>\n",
-       "      <td>0.384615</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.456647</td>\n",
-       "      <td>0.471400</td>\n",
-       "      <td>-0.558824</td>\n",
-       "      <td>-0.656414</td>\n",
-       "      <td>-0.285110</td>\n",
+       "      <td>-0.038687</td>\n",
+       "      <td>-0.141852</td>\n",
+       "      <td>-0.039906</td>\n",
+       "      <td>-0.071512</td>\n",
+       "      <td>-0.348183</td>\n",
+       "      <td>-0.208920</td>\n",
+       "      <td>-0.355140</td>\n",
+       "      <td>-0.999911</td>\n",
+       "      <td>-0.999917</td>\n",
+       "      <td>-0.999916</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.282353</td>\n",
-       "      <td>0.255814</td>\n",
-       "      <td>0.203691</td>\n",
-       "      <td>-0.208272</td>\n",
-       "      <td>-0.258947</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.420290</td>\n",
-       "      <td>-0.432836</td>\n",
-       "      <td>-0.719914</td>\n",
+       "      <td>-0.396825</td>\n",
+       "      <td>-0.373627</td>\n",
+       "      <td>-0.048276</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.047619</td>\n",
+       "      <td>-0.075862</td>\n",
+       "      <td>-0.356164</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.165975</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>0.080460</td>\n",
-       "      <td>-0.076923</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.618497</td>\n",
-       "      <td>0.644970</td>\n",
-       "      <td>-0.411765</td>\n",
-       "      <td>-0.650222</td>\n",
-       "      <td>-0.666795</td>\n",
+       "      <td>-0.390387</td>\n",
+       "      <td>-0.273154</td>\n",
+       "      <td>-0.255869</td>\n",
+       "      <td>-0.146542</td>\n",
+       "      <td>-0.249707</td>\n",
+       "      <td>-0.164319</td>\n",
+       "      <td>-0.352804</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>-0.999883</td>\n",
+       "      <td>-0.999881</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.282353</td>\n",
-       "      <td>0.147287</td>\n",
-       "      <td>0.283135</td>\n",
-       "      <td>-0.425527</td>\n",
-       "      <td>-0.337610</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.481481</td>\n",
-       "      <td>-0.343284</td>\n",
-       "      <td>-0.750965</td>\n",
+       "      <td>-0.333333</td>\n",
+       "      <td>-0.373556</td>\n",
+       "      <td>-0.393103</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.238095</td>\n",
+       "      <td>-0.131034</td>\n",
+       "      <td>-0.260274</td>\n",
+       "      <td>-0.158621</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.259336</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.218391</td>\n",
-       "      <td>-0.384615</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.445087</td>\n",
-       "      <td>0.380671</td>\n",
-       "      <td>-0.529412</td>\n",
-       "      <td>-0.650532</td>\n",
-       "      <td>-0.195845</td>\n",
+       "      <td>-0.366940</td>\n",
+       "      <td>-0.268464</td>\n",
+       "      <td>-0.258216</td>\n",
+       "      <td>-0.160610</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.417840</td>\n",
+       "      <td>-0.490654</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999846</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.270588</td>\n",
-       "      <td>0.178295</td>\n",
-       "      <td>0.364718</td>\n",
-       "      <td>-0.423021</td>\n",
-       "      <td>-0.190749</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.336554</td>\n",
-       "      <td>-0.492537</td>\n",
-       "      <td>-0.700081</td>\n",
+       "      <td>-0.390476</td>\n",
+       "      <td>-0.373492</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.224490</td>\n",
+       "      <td>-0.172414</td>\n",
+       "      <td>-0.397260</td>\n",
+       "      <td>-0.420690</td>\n",
+       "      <td>-0.489655</td>\n",
+       "      <td>-0.331950</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>5 rows × 24 columns</p>\n",
+       "<p>5 rows × 26 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "         0         1    2    3    4         5         6         7         8   \\\n",
-       "0 -0.080460 -0.692308 -1.0 -1.0 -1.0 -0.225434  0.171598 -0.588235 -0.600784   \n",
-       "1  0.218391 -0.538462 -1.0 -1.0 -1.0 -0.202312  0.416174 -0.441176 -0.674373   \n",
-       "2 -0.494253  0.384615 -1.0 -1.0 -1.0 -0.456647  0.471400 -0.558824 -0.656414   \n",
-       "3  0.080460 -0.076923 -1.0 -1.0 -1.0 -0.618497  0.644970 -0.411765 -0.650222   \n",
-       "4 -0.218391 -0.384615 -1.0 -1.0 -1.0 -0.445087  0.380671 -0.529412 -0.650532   \n",
+       "         0         1         2         3         4         5         6   \\\n",
+       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
+       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
+       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
+       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
+       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
        "\n",
-       "         9   ...        14        15        16        17        18   19   20  \\\n",
-       "0 -0.272028  ... -0.594118  0.426357  0.403772 -0.136829 -0.380486 -1.0  1.0   \n",
-       "1 -0.177376  ... -0.400000  0.333333  0.416076 -0.048256 -0.294733 -1.0  1.0   \n",
-       "2 -0.285110  ... -0.282353  0.255814  0.203691 -0.208272 -0.258947 -1.0  1.0   \n",
-       "3 -0.666795  ... -0.282353  0.147287  0.283135 -0.425527 -0.337610 -1.0  1.0   \n",
-       "4 -0.195845  ... -0.270588  0.178295  0.364718 -0.423021 -0.190749 -1.0  1.0   \n",
+       "         7         8         9   ...        16        17        18        19  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.257143 -0.373788 -0.227586 -0.227586   \n",
+       "1 -0.999959 -0.999958 -0.999959  ... -0.352381 -0.373708 -0.186207 -0.117241   \n",
+       "2 -0.999911 -0.999917 -0.999916  ... -0.396825 -0.373627 -0.048276 -0.144828   \n",
+       "3 -0.999881 -0.999883 -0.999881  ... -0.333333 -0.373556 -0.393103 -0.282759   \n",
+       "4 -0.999849 -0.999849 -0.999846  ... -0.390476 -0.373492 -0.365517 -0.282759   \n",
        "\n",
-       "         21        22        23  \n",
-       "0  0.452496 -0.492537 -0.780490  \n",
-       "1  0.256039 -0.552239 -0.799515  \n",
-       "2  0.420290 -0.432836 -0.719914  \n",
-       "3  0.481481 -0.343284 -0.750965  \n",
-       "4  0.336554 -0.492537 -0.700081  \n",
+       "         20        21        22        23        24        25  \n",
+       "0 -0.319728 -0.186207 -0.369863 -0.186207 -0.337931 -0.261411  \n",
+       "1 -0.115646 -0.062069 -0.232877 -0.144828 -0.406897 -0.170124  \n",
+       "2 -0.047619 -0.075862 -0.356164 -0.227586 -0.365517 -0.165975  \n",
+       "3 -0.238095 -0.131034 -0.260274 -0.158621 -0.365517 -0.259336  \n",
+       "4 -0.224490 -0.172414 -0.397260 -0.420690 -0.489655 -0.331950  \n",
        "\n",
-       "[5 rows x 24 columns]"
+       "[5 rows x 26 columns]"
       ]
      },
-     "execution_count": 24,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1781,18 +1710,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 30,
    "id": "d58c17c1",
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 6\n",
+    "step = 7\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 31,
    "id": "b5b62c52",
    "metadata": {},
    "outputs": [
@@ -1802,7 +1731,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1813,7 +1742,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 32,
    "id": "8bedb44e",
    "metadata": {},
    "outputs": [
@@ -1849,8 +1778,6 @@
        "      <th>8</th>\n",
        "      <th>9</th>\n",
        "      <th>...</th>\n",
-       "      <th>15</th>\n",
-       "      <th>16</th>\n",
        "      <th>17</th>\n",
        "      <th>18</th>\n",
        "      <th>19</th>\n",
@@ -1858,161 +1785,163 @@
        "      <th>21</th>\n",
        "      <th>22</th>\n",
        "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
        "      <th>turbine_id</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.080460</td>\n",
-       "      <td>-0.692308</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.225434</td>\n",
-       "      <td>0.171598</td>\n",
-       "      <td>-0.588235</td>\n",
-       "      <td>-0.600784</td>\n",
-       "      <td>-0.272028</td>\n",
+       "      <td>-0.235639</td>\n",
+       "      <td>-0.242673</td>\n",
+       "      <td>-0.328638</td>\n",
+       "      <td>-0.177022</td>\n",
+       "      <td>-0.352872</td>\n",
+       "      <td>-0.192488</td>\n",
+       "      <td>-0.331776</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.426357</td>\n",
-       "      <td>0.403772</td>\n",
-       "      <td>-0.136829</td>\n",
-       "      <td>-0.380486</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.452496</td>\n",
-       "      <td>-0.492537</td>\n",
-       "      <td>-0.780490</td>\n",
-       "      <td>1</td>\n",
+       "      <td>-0.373788</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.319728</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.369863</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.337931</td>\n",
+       "      <td>-0.261411</td>\n",
+       "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>0.218391</td>\n",
-       "      <td>-0.538462</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.202312</td>\n",
-       "      <td>0.416174</td>\n",
-       "      <td>-0.441176</td>\n",
-       "      <td>-0.674373</td>\n",
-       "      <td>-0.177376</td>\n",
+       "      <td>-0.181712</td>\n",
+       "      <td>-0.092614</td>\n",
+       "      <td>-0.133803</td>\n",
+       "      <td>-0.029308</td>\n",
+       "      <td>-0.216882</td>\n",
+       "      <td>-0.150235</td>\n",
+       "      <td>-0.404206</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>-0.999958</td>\n",
+       "      <td>-0.999959</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.333333</td>\n",
-       "      <td>0.416076</td>\n",
-       "      <td>-0.048256</td>\n",
-       "      <td>-0.294733</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.256039</td>\n",
-       "      <td>-0.552239</td>\n",
-       "      <td>-0.799515</td>\n",
-       "      <td>1</td>\n",
+       "      <td>-0.373708</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.117241</td>\n",
+       "      <td>-0.115646</td>\n",
+       "      <td>-0.062069</td>\n",
+       "      <td>-0.232877</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.406897</td>\n",
+       "      <td>-0.170124</td>\n",
+       "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.494253</td>\n",
-       "      <td>0.384615</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.456647</td>\n",
-       "      <td>0.471400</td>\n",
-       "      <td>-0.558824</td>\n",
-       "      <td>-0.656414</td>\n",
-       "      <td>-0.285110</td>\n",
+       "      <td>-0.038687</td>\n",
+       "      <td>-0.141852</td>\n",
+       "      <td>-0.039906</td>\n",
+       "      <td>-0.071512</td>\n",
+       "      <td>-0.348183</td>\n",
+       "      <td>-0.208920</td>\n",
+       "      <td>-0.355140</td>\n",
+       "      <td>-0.999911</td>\n",
+       "      <td>-0.999917</td>\n",
+       "      <td>-0.999916</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.255814</td>\n",
-       "      <td>0.203691</td>\n",
-       "      <td>-0.208272</td>\n",
-       "      <td>-0.258947</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.420290</td>\n",
-       "      <td>-0.432836</td>\n",
-       "      <td>-0.719914</td>\n",
-       "      <td>1</td>\n",
+       "      <td>-0.373627</td>\n",
+       "      <td>-0.048276</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.047619</td>\n",
+       "      <td>-0.075862</td>\n",
+       "      <td>-0.356164</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.165975</td>\n",
+       "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>0.080460</td>\n",
-       "      <td>-0.076923</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.618497</td>\n",
-       "      <td>0.644970</td>\n",
-       "      <td>-0.411765</td>\n",
-       "      <td>-0.650222</td>\n",
-       "      <td>-0.666795</td>\n",
+       "      <td>-0.390387</td>\n",
+       "      <td>-0.273154</td>\n",
+       "      <td>-0.255869</td>\n",
+       "      <td>-0.146542</td>\n",
+       "      <td>-0.249707</td>\n",
+       "      <td>-0.164319</td>\n",
+       "      <td>-0.352804</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>-0.999883</td>\n",
+       "      <td>-0.999881</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.147287</td>\n",
-       "      <td>0.283135</td>\n",
-       "      <td>-0.425527</td>\n",
-       "      <td>-0.337610</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.481481</td>\n",
-       "      <td>-0.343284</td>\n",
-       "      <td>-0.750965</td>\n",
-       "      <td>1</td>\n",
+       "      <td>-0.373556</td>\n",
+       "      <td>-0.393103</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.238095</td>\n",
+       "      <td>-0.131034</td>\n",
+       "      <td>-0.260274</td>\n",
+       "      <td>-0.158621</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.259336</td>\n",
+       "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.218391</td>\n",
-       "      <td>-0.384615</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.445087</td>\n",
-       "      <td>0.380671</td>\n",
-       "      <td>-0.529412</td>\n",
-       "      <td>-0.650532</td>\n",
-       "      <td>-0.195845</td>\n",
+       "      <td>-0.366940</td>\n",
+       "      <td>-0.268464</td>\n",
+       "      <td>-0.258216</td>\n",
+       "      <td>-0.160610</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.417840</td>\n",
+       "      <td>-0.490654</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999846</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.178295</td>\n",
-       "      <td>0.364718</td>\n",
-       "      <td>-0.423021</td>\n",
-       "      <td>-0.190749</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.336554</td>\n",
-       "      <td>-0.492537</td>\n",
-       "      <td>-0.700081</td>\n",
-       "      <td>1</td>\n",
+       "      <td>-0.373492</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.224490</td>\n",
+       "      <td>-0.172414</td>\n",
+       "      <td>-0.397260</td>\n",
+       "      <td>-0.420690</td>\n",
+       "      <td>-0.489655</td>\n",
+       "      <td>-0.331950</td>\n",
+       "      <td>T001</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>5 rows × 25 columns</p>\n",
+       "<p>5 rows × 27 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "          0         1    2    3    4         5         6         7         8  \\\n",
-       "0 -0.080460 -0.692308 -1.0 -1.0 -1.0 -0.225434  0.171598 -0.588235 -0.600784   \n",
-       "1  0.218391 -0.538462 -1.0 -1.0 -1.0 -0.202312  0.416174 -0.441176 -0.674373   \n",
-       "2 -0.494253  0.384615 -1.0 -1.0 -1.0 -0.456647  0.471400 -0.558824 -0.656414   \n",
-       "3  0.080460 -0.076923 -1.0 -1.0 -1.0 -0.618497  0.644970 -0.411765 -0.650222   \n",
-       "4 -0.218391 -0.384615 -1.0 -1.0 -1.0 -0.445087  0.380671 -0.529412 -0.650532   \n",
+       "          0         1         2         3         4         5         6  \\\n",
+       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
+       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
+       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
+       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
+       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
        "\n",
-       "          9  ...        15        16        17        18   19   20        21  \\\n",
-       "0 -0.272028  ...  0.426357  0.403772 -0.136829 -0.380486 -1.0  1.0  0.452496   \n",
-       "1 -0.177376  ...  0.333333  0.416076 -0.048256 -0.294733 -1.0  1.0  0.256039   \n",
-       "2 -0.285110  ...  0.255814  0.203691 -0.208272 -0.258947 -1.0  1.0  0.420290   \n",
-       "3 -0.666795  ...  0.147287  0.283135 -0.425527 -0.337610 -1.0  1.0  0.481481   \n",
-       "4 -0.195845  ...  0.178295  0.364718 -0.423021 -0.190749 -1.0  1.0  0.336554   \n",
+       "          7         8         9  ...        17        18        19        20  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.373788 -0.227586 -0.227586 -0.319728   \n",
+       "1 -0.999959 -0.999958 -0.999959  ... -0.373708 -0.186207 -0.117241 -0.115646   \n",
+       "2 -0.999911 -0.999917 -0.999916  ... -0.373627 -0.048276 -0.144828 -0.047619   \n",
+       "3 -0.999881 -0.999883 -0.999881  ... -0.373556 -0.393103 -0.282759 -0.238095   \n",
+       "4 -0.999849 -0.999849 -0.999846  ... -0.373492 -0.365517 -0.282759 -0.224490   \n",
        "\n",
-       "         22        23  turbine_id  \n",
-       "0 -0.492537 -0.780490           1  \n",
-       "1 -0.552239 -0.799515           1  \n",
-       "2 -0.432836 -0.719914           1  \n",
-       "3 -0.343284 -0.750965           1  \n",
-       "4 -0.492537 -0.700081           1  \n",
+       "         21        22        23        24        25  turbine_id  \n",
+       "0 -0.186207 -0.369863 -0.186207 -0.337931 -0.261411        T001  \n",
+       "1 -0.062069 -0.232877 -0.144828 -0.406897 -0.170124        T001  \n",
+       "2 -0.075862 -0.356164 -0.227586 -0.365517 -0.165975        T001  \n",
+       "3 -0.131034 -0.260274 -0.158621 -0.365517 -0.259336        T001  \n",
+       "4 -0.172414 -0.397260 -0.420690 -0.489655 -0.331950        T001  \n",
        "\n",
-       "[5 rows x 25 columns]"
+       "[5 rows x 27 columns]"
       ]
      },
-     "execution_count": 27,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2035,18 +1964,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 33,
    "id": "9896ef19",
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 7\n",
+    "step = 8\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 34,
    "id": "384e4e91",
    "metadata": {},
    "outputs": [
@@ -2056,7 +1985,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2067,7 +1996,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 35,
    "id": "7dcc2b2c",
    "metadata": {},
    "outputs": [
@@ -2103,14 +2032,14 @@
        "      <th>8</th>\n",
        "      <th>9</th>\n",
        "      <th>...</th>\n",
-       "      <th>16</th>\n",
-       "      <th>17</th>\n",
        "      <th>18</th>\n",
        "      <th>19</th>\n",
        "      <th>20</th>\n",
        "      <th>21</th>\n",
        "      <th>22</th>\n",
        "      <th>23</th>\n",
+       "      <th>24</th>\n",
+       "      <th>25</th>\n",
        "      <th>turbine_id</th>\n",
        "      <th>timestamp</th>\n",
        "    </tr>\n",
@@ -2118,155 +2047,155 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.080460</td>\n",
-       "      <td>-0.692308</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.225434</td>\n",
-       "      <td>0.171598</td>\n",
-       "      <td>-0.588235</td>\n",
-       "      <td>-0.600784</td>\n",
-       "      <td>-0.272028</td>\n",
+       "      <td>-0.235639</td>\n",
+       "      <td>-0.242673</td>\n",
+       "      <td>-0.328638</td>\n",
+       "      <td>-0.177022</td>\n",
+       "      <td>-0.352872</td>\n",
+       "      <td>-0.192488</td>\n",
+       "      <td>-0.331776</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
+       "      <td>-1.000000</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.403772</td>\n",
-       "      <td>-0.136829</td>\n",
-       "      <td>-0.380486</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.452496</td>\n",
-       "      <td>-0.492537</td>\n",
-       "      <td>-0.780490</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:10:00</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.319728</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.369863</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.337931</td>\n",
+       "      <td>-0.261411</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:00:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>0.218391</td>\n",
-       "      <td>-0.538462</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.202312</td>\n",
-       "      <td>0.416174</td>\n",
-       "      <td>-0.441176</td>\n",
-       "      <td>-0.674373</td>\n",
-       "      <td>-0.177376</td>\n",
+       "      <td>-0.181712</td>\n",
+       "      <td>-0.092614</td>\n",
+       "      <td>-0.133803</td>\n",
+       "      <td>-0.029308</td>\n",
+       "      <td>-0.216882</td>\n",
+       "      <td>-0.150235</td>\n",
+       "      <td>-0.404206</td>\n",
+       "      <td>-0.999959</td>\n",
+       "      <td>-0.999958</td>\n",
+       "      <td>-0.999959</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.416076</td>\n",
-       "      <td>-0.048256</td>\n",
-       "      <td>-0.294733</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.256039</td>\n",
-       "      <td>-0.552239</td>\n",
-       "      <td>-0.799515</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:20:00</td>\n",
+       "      <td>-0.186207</td>\n",
+       "      <td>-0.117241</td>\n",
+       "      <td>-0.115646</td>\n",
+       "      <td>-0.062069</td>\n",
+       "      <td>-0.232877</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.406897</td>\n",
+       "      <td>-0.170124</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:10:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.494253</td>\n",
-       "      <td>0.384615</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.456647</td>\n",
-       "      <td>0.471400</td>\n",
-       "      <td>-0.558824</td>\n",
-       "      <td>-0.656414</td>\n",
-       "      <td>-0.285110</td>\n",
+       "      <td>-0.038687</td>\n",
+       "      <td>-0.141852</td>\n",
+       "      <td>-0.039906</td>\n",
+       "      <td>-0.071512</td>\n",
+       "      <td>-0.348183</td>\n",
+       "      <td>-0.208920</td>\n",
+       "      <td>-0.355140</td>\n",
+       "      <td>-0.999911</td>\n",
+       "      <td>-0.999917</td>\n",
+       "      <td>-0.999916</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.203691</td>\n",
-       "      <td>-0.208272</td>\n",
-       "      <td>-0.258947</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.420290</td>\n",
-       "      <td>-0.432836</td>\n",
-       "      <td>-0.719914</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:30:00</td>\n",
+       "      <td>-0.048276</td>\n",
+       "      <td>-0.144828</td>\n",
+       "      <td>-0.047619</td>\n",
+       "      <td>-0.075862</td>\n",
+       "      <td>-0.356164</td>\n",
+       "      <td>-0.227586</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.165975</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:20:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>0.080460</td>\n",
-       "      <td>-0.076923</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.618497</td>\n",
-       "      <td>0.644970</td>\n",
-       "      <td>-0.411765</td>\n",
-       "      <td>-0.650222</td>\n",
-       "      <td>-0.666795</td>\n",
+       "      <td>-0.390387</td>\n",
+       "      <td>-0.273154</td>\n",
+       "      <td>-0.255869</td>\n",
+       "      <td>-0.146542</td>\n",
+       "      <td>-0.249707</td>\n",
+       "      <td>-0.164319</td>\n",
+       "      <td>-0.352804</td>\n",
+       "      <td>-0.999881</td>\n",
+       "      <td>-0.999883</td>\n",
+       "      <td>-0.999881</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.283135</td>\n",
-       "      <td>-0.425527</td>\n",
-       "      <td>-0.337610</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.481481</td>\n",
-       "      <td>-0.343284</td>\n",
-       "      <td>-0.750965</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:40:00</td>\n",
+       "      <td>-0.393103</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.238095</td>\n",
+       "      <td>-0.131034</td>\n",
+       "      <td>-0.260274</td>\n",
+       "      <td>-0.158621</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.259336</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:30:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.218391</td>\n",
-       "      <td>-0.384615</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>-0.445087</td>\n",
-       "      <td>0.380671</td>\n",
-       "      <td>-0.529412</td>\n",
-       "      <td>-0.650532</td>\n",
-       "      <td>-0.195845</td>\n",
+       "      <td>-0.366940</td>\n",
+       "      <td>-0.268464</td>\n",
+       "      <td>-0.258216</td>\n",
+       "      <td>-0.160610</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.417840</td>\n",
+       "      <td>-0.490654</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999849</td>\n",
+       "      <td>-0.999846</td>\n",
        "      <td>...</td>\n",
-       "      <td>0.364718</td>\n",
-       "      <td>-0.423021</td>\n",
-       "      <td>-0.190749</td>\n",
-       "      <td>-1.0</td>\n",
-       "      <td>1.0</td>\n",
-       "      <td>0.336554</td>\n",
-       "      <td>-0.492537</td>\n",
-       "      <td>-0.700081</td>\n",
-       "      <td>1</td>\n",
-       "      <td>2013-01-12 00:50:00</td>\n",
+       "      <td>-0.365517</td>\n",
+       "      <td>-0.282759</td>\n",
+       "      <td>-0.224490</td>\n",
+       "      <td>-0.172414</td>\n",
+       "      <td>-0.397260</td>\n",
+       "      <td>-0.420690</td>\n",
+       "      <td>-0.489655</td>\n",
+       "      <td>-0.331950</td>\n",
+       "      <td>T001</td>\n",
+       "      <td>2013-01-10 00:40:00</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
-       "<p>5 rows × 26 columns</p>\n",
+       "<p>5 rows × 28 columns</p>\n",
        "</div>"
       ],
       "text/plain": [
-       "          0         1    2    3    4         5         6         7         8  \\\n",
-       "0 -0.080460 -0.692308 -1.0 -1.0 -1.0 -0.225434  0.171598 -0.588235 -0.600784   \n",
-       "1  0.218391 -0.538462 -1.0 -1.0 -1.0 -0.202312  0.416174 -0.441176 -0.674373   \n",
-       "2 -0.494253  0.384615 -1.0 -1.0 -1.0 -0.456647  0.471400 -0.558824 -0.656414   \n",
-       "3  0.080460 -0.076923 -1.0 -1.0 -1.0 -0.618497  0.644970 -0.411765 -0.650222   \n",
-       "4 -0.218391 -0.384615 -1.0 -1.0 -1.0 -0.445087  0.380671 -0.529412 -0.650532   \n",
+       "          0         1         2         3         4         5         6  \\\n",
+       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
+       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
+       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
+       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
+       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
        "\n",
-       "          9  ...        16        17        18   19   20        21        22  \\\n",
-       "0 -0.272028  ...  0.403772 -0.136829 -0.380486 -1.0  1.0  0.452496 -0.492537   \n",
-       "1 -0.177376  ...  0.416076 -0.048256 -0.294733 -1.0  1.0  0.256039 -0.552239   \n",
-       "2 -0.285110  ...  0.203691 -0.208272 -0.258947 -1.0  1.0  0.420290 -0.432836   \n",
-       "3 -0.666795  ...  0.283135 -0.425527 -0.337610 -1.0  1.0  0.481481 -0.343284   \n",
-       "4 -0.195845  ...  0.364718 -0.423021 -0.190749 -1.0  1.0  0.336554 -0.492537   \n",
+       "          7         8         9  ...        18        19        20        21  \\\n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.227586 -0.227586 -0.319728 -0.186207   \n",
+       "1 -0.999959 -0.999958 -0.999959  ... -0.186207 -0.117241 -0.115646 -0.062069   \n",
+       "2 -0.999911 -0.999917 -0.999916  ... -0.048276 -0.144828 -0.047619 -0.075862   \n",
+       "3 -0.999881 -0.999883 -0.999881  ... -0.393103 -0.282759 -0.238095 -0.131034   \n",
+       "4 -0.999849 -0.999849 -0.999846  ... -0.365517 -0.282759 -0.224490 -0.172414   \n",
        "\n",
-       "         23  turbine_id           timestamp  \n",
-       "0 -0.780490           1 2013-01-12 00:10:00  \n",
-       "1 -0.799515           1 2013-01-12 00:20:00  \n",
-       "2 -0.719914           1 2013-01-12 00:30:00  \n",
-       "3 -0.750965           1 2013-01-12 00:40:00  \n",
-       "4 -0.700081           1 2013-01-12 00:50:00  \n",
+       "         22        23        24        25  turbine_id           timestamp  \n",
+       "0 -0.369863 -0.186207 -0.337931 -0.261411        T001 2013-01-10 00:00:00  \n",
+       "1 -0.232877 -0.144828 -0.406897 -0.170124        T001 2013-01-10 00:10:00  \n",
+       "2 -0.356164 -0.227586 -0.365517 -0.165975        T001 2013-01-10 00:20:00  \n",
+       "3 -0.260274 -0.158621 -0.365517 -0.259336        T001 2013-01-10 00:30:00  \n",
+       "4 -0.397260 -0.420690 -0.489655 -0.331950        T001 2013-01-10 00:40:00  \n",
        "\n",
-       "[5 rows x 26 columns]"
+       "[5 rows x 28 columns]"
       ]
      },
-     "execution_count": 30,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2290,7 +2219,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 36,
    "id": "b4ff2d0a",
    "metadata": {},
    "outputs": [
@@ -2300,30 +2229,30 @@
        "{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "pipeline._pipeline.get_hyperparameters()[\n",
-    "    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
+    "    'mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 37,
    "id": "2c8fd174",
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 8\n",
+    "step = 9\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 38,
    "id": "b051da01",
    "metadata": {},
    "outputs": [
@@ -2333,7 +2262,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2344,17 +2273,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 39,
    "id": "a802d22b",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(33727, 26)"
+       "(51121, 28)"
       ]
      },
-     "execution_count": 34,
+     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2365,17 +2294,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 40,
    "id": "cc53012b",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(18131,)"
+       "(353,)"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 40,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2386,17 +2315,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 41,
    "id": "b1212aaf",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(18131, 24, 24)"
+       "(353, 24, 26)"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 41,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2407,31 +2336,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 42,
    "id": "87abb56d",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[ 0.2183908 , -0.53846154, -1.        , -1.        , -1.        ,\n",
-       "        -0.20231214,  0.41617357, -0.44117647, -0.674373  , -0.17737591,\n",
-       "        -1.        , -0.33333333, -1.        , -1.        , -0.4       ,\n",
-       "         0.33333333,  0.41607597, -0.04825569, -0.29473329, -1.        ,\n",
-       "         1.        ,  0.25603865, -0.55223881, -0.79951539],\n",
-       "       [-0.49425287,  0.38461538, -1.        , -1.        , -1.        ,\n",
-       "        -0.4566474 ,  0.47140039, -0.55882353, -0.65641449, -0.28510966,\n",
-       "        -1.        , -0.66666667, -1.        , -1.        , -0.28235294,\n",
-       "         0.25581395,  0.20369132, -0.2082724 , -0.25894666, -1.        ,\n",
-       "         1.        ,  0.42028986, -0.43283582, -0.71991385],\n",
-       "       [ 0.08045977, -0.07692308, -1.        , -1.        , -1.        ,\n",
-       "        -0.61849711,  0.64497041, -0.41176471, -0.6502219 , -0.66679492,\n",
-       "        -1.        , -0.33333333, -1.        , -1.        , -0.28235294,\n",
-       "         0.14728682,  0.28313495, -0.42552747, -0.33760972, -1.        ,\n",
-       "         1.        ,  0.48148148, -0.34328358, -0.75096473]])"
+       "array([[-0.66002345, -0.57327081, -0.64084507, -0.57796014, -0.6014068 ,\n",
+       "        -0.56103286, -0.55140187, -0.9928135 , -0.99291267, -0.99315058,\n",
+       "        -0.99304288, -0.99346346, -0.99352632, -0.99395333, -0.42553191,\n",
+       "        -0.41772152, -0.58730159, -0.35996294, -0.66896552, -0.57241379,\n",
+       "        -0.61904762, -0.5862069 , -0.60273973, -0.55862069, -0.55862069,\n",
+       "        -0.59751037],\n",
+       "       [-0.2989449 , -0.38569754, -0.48591549, -0.47713951, -0.66705744,\n",
+       "        -0.5915493 , -0.77336449, -0.99278389, -0.9928852 , -0.99312701,\n",
+       "        -0.99301988, -0.9934481 , -0.9935075 , -0.9939459 , -0.39361702,\n",
+       "        -0.40506329, -0.54285714, -0.35992014, -0.40689655, -0.42068966,\n",
+       "        -0.46938776, -0.48965517, -0.67123288, -0.5862069 , -0.83448276,\n",
+       "        -0.5560166 ],\n",
+       "       [-0.33645955, -0.40679953, -0.39906103, -0.38569754, -0.56154748,\n",
+       "        -0.43192488, -0.45560748, -0.99275498, -0.9928584 , -0.99310017,\n",
+       "        -0.99299431, -0.99342739, -0.99348349, -0.99392294, -0.29787234,\n",
+       "        -0.3164557 , -0.49206349, -0.35986854, -0.42068966, -0.43448276,\n",
+       "        -0.40136054, -0.43448276, -0.56164384, -0.47586207, -0.51724138,\n",
+       "        -0.46473029]])"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2454,7 +2386,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 43,
    "id": "561c3e09",
    "metadata": {},
    "outputs": [
@@ -2462,15 +2394,49 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-02-01 10:08:21.044547: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2022-02-01 10:08:21.080727: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7f8579596430 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
-      "2022-02-01 10:08:21.080742: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
+      "2023-04-13 18:20:05.852611: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
+      "2023-04-13 18:20:05.887442: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fc91ac474f0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+      "2023-04-13 18:20:05.887460: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 1/35\n",
+      "5/5 [==============================] - 1s 123ms/step - loss: 0.2339 - mse: 0.2339 - val_loss: 0.1641 - val_mse: 0.1641\n",
+      "Epoch 2/35\n",
+      "5/5 [==============================] - 0s 22ms/step - loss: 0.1780 - mse: 0.1780 - val_loss: 0.1505 - val_mse: 0.1505\n",
+      "Epoch 3/35\n",
+      "5/5 [==============================] - 0s 21ms/step - loss: 0.1540 - mse: 0.1540 - val_loss: 0.1559 - val_mse: 0.1559\n",
+      "Epoch 4/35\n",
+      "5/5 [==============================] - 0s 21ms/step - loss: 0.1532 - mse: 0.1532 - val_loss: 0.1446 - val_mse: 0.1446\n",
+      "Epoch 5/35\n",
+      "5/5 [==============================] - 0s 21ms/step - loss: 0.1438 - mse: 0.1438 - val_loss: 0.1511 - val_mse: 0.1511\n",
+      "Epoch 6/35\n",
+      "5/5 [==============================] - 0s 24ms/step - loss: 0.1449 - mse: 0.1449 - val_loss: 0.1470 - val_mse: 0.1470\n",
+      "Epoch 7/35\n",
+      "5/5 [==============================] - 0s 21ms/step - loss: 0.1437 - mse: 0.1437 - val_loss: 0.1537 - val_mse: 0.1537\n",
+      "Epoch 8/35\n",
+      "5/5 [==============================] - 0s 25ms/step - loss: 0.1529 - mse: 0.1529 - val_loss: 0.1910 - val_mse: 0.1910\n",
+      "Epoch 9/35\n",
+      "5/5 [==============================] - 0s 25ms/step - loss: 0.1406 - mse: 0.1406 - val_loss: 0.1551 - val_mse: 0.1551\n",
+      "Epoch 10/35\n",
+      "5/5 [==============================] - 0s 22ms/step - loss: 0.1360 - mse: 0.1360 - val_loss: 0.1745 - val_mse: 0.1745\n",
+      "Epoch 11/35\n",
+      "5/5 [==============================] - 0s 22ms/step - loss: 0.1314 - mse: 0.1314 - val_loss: 0.1848 - val_mse: 0.1848\n",
+      "Epoch 12/35\n",
+      "5/5 [==============================] - 0s 21ms/step - loss: 0.1306 - mse: 0.1306 - val_loss: 0.1734 - val_mse: 0.1734\n",
+      "Epoch 13/35\n",
+      "5/5 [==============================] - 0s 21ms/step - loss: 0.1258 - mse: 0.1258 - val_loss: 0.1816 - val_mse: 0.1816\n",
+      "Epoch 14/35\n",
+      "5/5 [==============================] - 0s 21ms/step - loss: 0.1230 - mse: 0.1230 - val_loss: 0.1820 - val_mse: 0.1820\n",
+      "6/6 [==============================] - 0s 5ms/step\n"
      ]
     }
    ],
    "source": [
-    "step = 9\n",
+    "step = 10\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   }
@@ -2491,7 +2457,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.8.16"
   }
  },
  "nbformat": 4,
diff --git a/tutorials/pipelines/lstm_with_unstack.ipynb b/tutorials/pipelines/lstm_with_unstack.ipynb
index 799b90e..3793f21 100644
--- a/tutorials/pipelines/lstm_with_unstack.ipynb
+++ b/tutorials/pipelines/lstm_with_unstack.ipynb
@@ -46,7 +46,8 @@
     {
      "data": {
       "text/plain": [
-       "['mlblocks.MLPipeline',\n",
+       "['pandas.DataFrame.resample',\n",
+       " 'pandas.DataFrame.unstack',\n",
        " 'pandas.DataFrame.pop',\n",
        " 'pandas.DataFrame.pop',\n",
        " 'sklearn.impute.SimpleImputer',\n",
@@ -54,7 +55,7 @@
        " 'pandas.DataFrame',\n",
        " 'pandas.DataFrame.set',\n",
        " 'pandas.DataFrame.set',\n",
-       " 'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
+       " 'mlstars.custom.timeseries_preprocessing.cutoff_window_sequences',\n",
        " 'keras.Sequential.LSTMTimeSeriesClassifier']"
       ]
      },
@@ -270,20 +271,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## mlblocks.MLPipeline\n",
-    "\n",
-    "### pandas.DataFrame.resample\n",
+    "## pandas.DataFrame.resample\n",
     "\n",
     "* Input: readings\n",
     "* Output: readings (resampled)\n",
     "* Effect: readings have been resampled to the indicated resample rule and turbine_id,\n",
-    "  signal_id and timestamp have been set as a multi-index\n",
-    "  \n",
-    "### pandas.DataFrame.unstack\n",
-    "\n",
-    "* Input: readings (resampled)\n",
-    "* Output: readings (unstacked)\n",
-    "* Effect: readings have been unstacked"
+    "  signal_id and timestamp have been set as a multi-index"
    ]
   },
   {
@@ -319,6 +312,130 @@
    "cell_type": "code",
    "execution_count": 9,
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th>value</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>turbine_id</th>\n",
+       "      <th>signal_id</th>\n",
+       "      <th>timestamp</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th rowspan=\"5\" valign=\"top\">T001</th>\n",
+       "      <th rowspan=\"5\" valign=\"top\">S01</th>\n",
+       "      <th>2013-01-10 00:00:00</th>\n",
+       "      <td>313.333333</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 01:00:00</th>\n",
+       "      <td>197.500000</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 02:00:00</th>\n",
+       "      <td>248.166667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 03:00:00</th>\n",
+       "      <td>253.166667</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2013-01-10 04:00:00</th>\n",
+       "      <td>305.000000</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                                               value\n",
+       "turbine_id signal_id timestamp                      \n",
+       "T001       S01       2013-01-10 00:00:00  313.333333\n",
+       "                     2013-01-10 01:00:00  197.500000\n",
+       "                     2013-01-10 02:00:00  248.166667\n",
+       "                     2013-01-10 03:00:00  253.166667\n",
+       "                     2013-01-10 04:00:00  305.000000"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context['readings'].head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## pandas.DataFrame.unstack\n",
+    "\n",
+    "* Input: readings (resampled)\n",
+    "* Output: readings (unstacked)\n",
+    "* Effect: readings have been unstacked"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "step = 1\n",
+    "context = pipeline.fit(**context, output_=step, start_=step)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "dict_keys(['readings', 'turbines', 'X', 'y'])"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "context.keys()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
    "outputs": [
     {
      "data": {
@@ -369,121 +486,121 @@
        "      <th>0</th>\n",
        "      <td>T001</td>\n",
        "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>323.0</td>\n",
-       "      <td>320.0</td>\n",
-       "      <td>284.0</td>\n",
-       "      <td>348.0</td>\n",
-       "      <td>273.0</td>\n",
-       "      <td>342.0</td>\n",
-       "      <td>280.0</td>\n",
-       "      <td>3197842.0</td>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>11.7</td>\n",
-       "      <td>3131020.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>356.0</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
-       "      <td>346.0</td>\n",
-       "      <td>384.0</td>\n",
-       "      <td>367.0</td>\n",
-       "      <td>411.0</td>\n",
-       "      <td>331.0</td>\n",
-       "      <td>360.0</td>\n",
-       "      <td>249.0</td>\n",
-       "      <td>3197900.0</td>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.2</td>\n",
-       "      <td>3131420.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>63.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>400.0</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>363.0</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>393.0</td>\n",
-       "      <td>275.0</td>\n",
-       "      <td>335.0</td>\n",
-       "      <td>270.0</td>\n",
-       "      <td>3197968.0</td>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.5</td>\n",
-       "      <td>3131822.0</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>66.0</td>\n",
-       "      <td>46.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>402.0</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
-       "      <td>257.0</td>\n",
-       "      <td>307.0</td>\n",
-       "      <td>315.0</td>\n",
-       "      <td>361.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>354.0</td>\n",
-       "      <td>271.0</td>\n",
-       "      <td>3198011.0</td>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.5</td>\n",
-       "      <td>3132179.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>60.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>357.0</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
-       "      <td>267.0</td>\n",
-       "      <td>309.0</td>\n",
-       "      <td>314.0</td>\n",
-       "      <td>355.0</td>\n",
-       "      <td>262.0</td>\n",
-       "      <td>246.0</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>3198056.0</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.6</td>\n",
-       "      <td>3132501.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>54.0</td>\n",
-       "      <td>59.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>41.0</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>322.0</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -491,38 +608,38 @@
        "</div>"
       ],
       "text/plain": [
-       "  turbine_id           timestamp  value_S01  value_S02  value_S03  value_S04  \\\n",
-       "0       T001 2013-01-10 00:00:00      323.0      320.0      284.0      348.0   \n",
-       "1       T001 2013-01-10 00:10:00      346.0      384.0      367.0      411.0   \n",
-       "2       T001 2013-01-10 00:20:00      407.0      363.0      407.0      393.0   \n",
-       "3       T001 2013-01-10 00:30:00      257.0      307.0      315.0      361.0   \n",
-       "4       T001 2013-01-10 00:40:00      267.0      309.0      314.0      355.0   \n",
+       "  turbine_id           timestamp   value_S01   value_S02   value_S03  \\\n",
+       "0       T001 2013-01-10 00:00:00  313.333333  323.833333  336.000000   \n",
+       "1       T001 2013-01-10 01:00:00  197.500000  221.333333  216.000000   \n",
+       "2       T001 2013-01-10 02:00:00  248.166667  271.666667  277.500000   \n",
+       "3       T001 2013-01-10 03:00:00  253.166667  256.166667  242.666667   \n",
+       "4       T001 2013-01-10 04:00:00  305.000000  312.333333  346.166667   \n",
        "\n",
-       "   value_S05  value_S06  value_S07  value_S08  ...  value_S17  value_S18  \\\n",
-       "0      273.0      342.0      280.0  3197842.0  ...       11.7  3131020.0   \n",
-       "1      331.0      360.0      249.0  3197900.0  ...       10.2  3131420.0   \n",
-       "2      275.0      335.0      270.0  3197968.0  ...        9.5  3131822.0   \n",
-       "3      317.0      354.0      271.0  3198011.0  ...       10.5  3132179.0   \n",
-       "4      262.0      246.0      212.0  3198056.0  ...        9.6  3132501.0   \n",
+       "    value_S04   value_S05   value_S06   value_S07     value_S08  ...  \\\n",
+       "0  364.666667  286.500000  314.000000  243.166667  3.197980e+06  ...   \n",
+       "1  260.666667  206.833333  235.833333  186.666667  3.198221e+06  ...   \n",
+       "2  298.000000  233.666667  271.166667  216.333333  3.198448e+06  ...   \n",
+       "3  265.333333  211.666667  226.666667  181.000000  3.198691e+06  ...   \n",
+       "4  329.833333  280.666667  308.833333  271.833333  3.198978e+06  ...   \n",
        "\n",
-       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
-       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
-       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
-       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
-       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
-       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
+       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
+       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
+       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
+       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
+       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
        "\n",
-       "   value_S25  value_S26  \n",
-       "0       47.0      356.0  \n",
-       "1       42.0      400.0  \n",
-       "2       45.0      402.0  \n",
-       "3       45.0      357.0  \n",
-       "4       36.0      322.0  \n",
+       "   value_S23  value_S24  value_S25   value_S26  \n",
+       "0  47.666667  52.666667  40.833333  357.333333  \n",
+       "1  34.500000  39.333333  31.166667  249.666667  \n",
+       "2  39.333333  45.500000  36.166667  297.666667  \n",
+       "3  35.333333  37.833333  30.333333  268.000000  \n",
+       "4  46.166667  49.666667  41.166667  341.833333  \n",
        "\n",
        "[5 rows x 28 columns]"
       ]
      },
-     "execution_count": 9,
+     "execution_count": 12,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -544,17 +661,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 1\n",
+    "step = 2\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [
     {
@@ -563,7 +680,7 @@
        "dict_keys(['readings', 'turbines', 'X', 'y', 'turbine_id'])"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 14,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -574,7 +691,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
@@ -588,7 +705,7 @@
        "Name: turbine_id, dtype: object"
       ]
      },
-     "execution_count": 12,
+     "execution_count": 15,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -599,7 +716,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -650,122 +767,122 @@
        "    <tr>\n",
        "      <th>0</th>\n",
        "      <td>2013-01-10 00:00:00</td>\n",
-       "      <td>323.0</td>\n",
-       "      <td>320.0</td>\n",
-       "      <td>284.0</td>\n",
-       "      <td>348.0</td>\n",
-       "      <td>273.0</td>\n",
-       "      <td>342.0</td>\n",
-       "      <td>280.0</td>\n",
-       "      <td>3197842.0</td>\n",
-       "      <td>695000.0</td>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
+       "      <td>695143.166667</td>\n",
        "      <td>...</td>\n",
-       "      <td>11.7</td>\n",
-       "      <td>3131020.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>356.0</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
-       "      <td>346.0</td>\n",
-       "      <td>384.0</td>\n",
-       "      <td>367.0</td>\n",
-       "      <td>411.0</td>\n",
-       "      <td>331.0</td>\n",
-       "      <td>360.0</td>\n",
-       "      <td>249.0</td>\n",
-       "      <td>3197900.0</td>\n",
-       "      <td>695063.0</td>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
+       "      <td>695403.666667</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.2</td>\n",
-       "      <td>3131420.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>63.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>400.0</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>363.0</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>393.0</td>\n",
-       "      <td>275.0</td>\n",
-       "      <td>335.0</td>\n",
-       "      <td>270.0</td>\n",
-       "      <td>3197968.0</td>\n",
-       "      <td>695124.0</td>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
+       "      <td>695656.500000</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.5</td>\n",
-       "      <td>3131822.0</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>66.0</td>\n",
-       "      <td>46.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>402.0</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
-       "      <td>257.0</td>\n",
-       "      <td>307.0</td>\n",
-       "      <td>315.0</td>\n",
-       "      <td>361.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>354.0</td>\n",
-       "      <td>271.0</td>\n",
-       "      <td>3198011.0</td>\n",
-       "      <td>695175.0</td>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
+       "      <td>695911.333333</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.5</td>\n",
-       "      <td>3132179.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>60.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>357.0</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
-       "      <td>267.0</td>\n",
-       "      <td>309.0</td>\n",
-       "      <td>314.0</td>\n",
-       "      <td>355.0</td>\n",
-       "      <td>262.0</td>\n",
-       "      <td>246.0</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>3198056.0</td>\n",
-       "      <td>695226.0</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
+       "      <td>696195.833333</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.6</td>\n",
-       "      <td>3132501.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>54.0</td>\n",
-       "      <td>59.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>41.0</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>322.0</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -773,38 +890,38 @@
        "</div>"
       ],
       "text/plain": [
-       "            timestamp  value_S01  value_S02  value_S03  value_S04  value_S05  \\\n",
-       "0 2013-01-10 00:00:00      323.0      320.0      284.0      348.0      273.0   \n",
-       "1 2013-01-10 00:10:00      346.0      384.0      367.0      411.0      331.0   \n",
-       "2 2013-01-10 00:20:00      407.0      363.0      407.0      393.0      275.0   \n",
-       "3 2013-01-10 00:30:00      257.0      307.0      315.0      361.0      317.0   \n",
-       "4 2013-01-10 00:40:00      267.0      309.0      314.0      355.0      262.0   \n",
+       "            timestamp   value_S01   value_S02   value_S03   value_S04  \\\n",
+       "0 2013-01-10 00:00:00  313.333333  323.833333  336.000000  364.666667   \n",
+       "1 2013-01-10 01:00:00  197.500000  221.333333  216.000000  260.666667   \n",
+       "2 2013-01-10 02:00:00  248.166667  271.666667  277.500000  298.000000   \n",
+       "3 2013-01-10 03:00:00  253.166667  256.166667  242.666667  265.333333   \n",
+       "4 2013-01-10 04:00:00  305.000000  312.333333  346.166667  329.833333   \n",
        "\n",
-       "   value_S06  value_S07  value_S08  value_S09  ...  value_S17  value_S18  \\\n",
-       "0      342.0      280.0  3197842.0   695000.0  ...       11.7  3131020.0   \n",
-       "1      360.0      249.0  3197900.0   695063.0  ...       10.2  3131420.0   \n",
-       "2      335.0      270.0  3197968.0   695124.0  ...        9.5  3131822.0   \n",
-       "3      354.0      271.0  3198011.0   695175.0  ...       10.5  3132179.0   \n",
-       "4      246.0      212.0  3198056.0   695226.0  ...        9.6  3132501.0   \n",
+       "    value_S05   value_S06   value_S07     value_S08      value_S09  ...  \\\n",
+       "0  286.500000  314.000000  243.166667  3.197980e+06  695143.166667  ...   \n",
+       "1  206.833333  235.833333  186.666667  3.198221e+06  695403.666667  ...   \n",
+       "2  233.666667  271.166667  216.333333  3.198448e+06  695656.500000  ...   \n",
+       "3  211.666667  226.666667  181.000000  3.198691e+06  695911.333333  ...   \n",
+       "4  280.666667  308.833333  271.833333  3.198978e+06  696195.833333  ...   \n",
        "\n",
-       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
-       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
-       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
-       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
-       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
-       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "   value_S17     value_S18  value_S19  value_S20  value_S21  value_S22  \\\n",
+       "0  10.383333  3.131958e+06  52.666667  54.333333  56.166667  61.000000   \n",
+       "1   8.666667  3.133668e+06  33.166667  37.000000  36.166667  43.666667   \n",
+       "2   8.833333  3.135413e+06  41.500000  45.666667  46.500000  49.666667   \n",
+       "3   8.433333  3.137001e+06  42.333333  42.833333  40.500000  44.166667   \n",
+       "4   9.083333  3.138843e+06  50.500000  51.166667  55.500000  53.666667   \n",
        "\n",
-       "   value_S25  value_S26  \n",
-       "0       47.0      356.0  \n",
-       "1       42.0      400.0  \n",
-       "2       45.0      402.0  \n",
-       "3       45.0      357.0  \n",
-       "4       36.0      322.0  \n",
+       "   value_S23  value_S24  value_S25   value_S26  \n",
+       "0  47.666667  52.666667  40.833333  357.333333  \n",
+       "1  34.500000  39.333333  31.166667  249.666667  \n",
+       "2  39.333333  45.500000  36.166667  297.666667  \n",
+       "3  35.333333  37.833333  30.333333  268.000000  \n",
+       "4  46.166667  49.666667  41.166667  341.833333  \n",
        "\n",
        "[5 rows x 27 columns]"
       ]
      },
-     "execution_count": 13,
+     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -826,17 +943,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 17,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 2\n",
+    "step = 3\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
@@ -845,7 +962,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'X', 'y', 'timestamp'])"
       ]
      },
-     "execution_count": 15,
+     "execution_count": 18,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -856,21 +973,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
        "0   2013-01-10 00:00:00\n",
-       "1   2013-01-10 00:10:00\n",
-       "2   2013-01-10 00:20:00\n",
-       "3   2013-01-10 00:30:00\n",
-       "4   2013-01-10 00:40:00\n",
+       "1   2013-01-10 01:00:00\n",
+       "2   2013-01-10 02:00:00\n",
+       "3   2013-01-10 03:00:00\n",
+       "4   2013-01-10 04:00:00\n",
        "Name: timestamp, dtype: datetime64[ns]"
       ]
      },
-     "execution_count": 16,
+     "execution_count": 19,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -881,7 +998,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": 20,
    "metadata": {},
    "outputs": [
     {
@@ -931,123 +1048,123 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>323.0</td>\n",
-       "      <td>320.0</td>\n",
-       "      <td>284.0</td>\n",
-       "      <td>348.0</td>\n",
-       "      <td>273.0</td>\n",
-       "      <td>342.0</td>\n",
-       "      <td>280.0</td>\n",
-       "      <td>3197842.0</td>\n",
-       "      <td>695000.0</td>\n",
-       "      <td>3348234.0</td>\n",
+       "      <td>313.333333</td>\n",
+       "      <td>323.833333</td>\n",
+       "      <td>336.000000</td>\n",
+       "      <td>364.666667</td>\n",
+       "      <td>286.500000</td>\n",
+       "      <td>314.000000</td>\n",
+       "      <td>243.166667</td>\n",
+       "      <td>3.197980e+06</td>\n",
+       "      <td>695143.166667</td>\n",
+       "      <td>3.348384e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>11.7</td>\n",
-       "      <td>3131020.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>47.0</td>\n",
-       "      <td>356.0</td>\n",
+       "      <td>10.383333</td>\n",
+       "      <td>3.131958e+06</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>54.333333</td>\n",
+       "      <td>56.166667</td>\n",
+       "      <td>61.000000</td>\n",
+       "      <td>47.666667</td>\n",
+       "      <td>52.666667</td>\n",
+       "      <td>40.833333</td>\n",
+       "      <td>357.333333</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>346.0</td>\n",
-       "      <td>384.0</td>\n",
-       "      <td>367.0</td>\n",
-       "      <td>411.0</td>\n",
-       "      <td>331.0</td>\n",
-       "      <td>360.0</td>\n",
-       "      <td>249.0</td>\n",
-       "      <td>3197900.0</td>\n",
-       "      <td>695063.0</td>\n",
-       "      <td>3348296.0</td>\n",
+       "      <td>197.500000</td>\n",
+       "      <td>221.333333</td>\n",
+       "      <td>216.000000</td>\n",
+       "      <td>260.666667</td>\n",
+       "      <td>206.833333</td>\n",
+       "      <td>235.833333</td>\n",
+       "      <td>186.666667</td>\n",
+       "      <td>3.198221e+06</td>\n",
+       "      <td>695403.666667</td>\n",
+       "      <td>3.348651e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.2</td>\n",
-       "      <td>3131420.0</td>\n",
-       "      <td>58.0</td>\n",
-       "      <td>63.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>42.0</td>\n",
-       "      <td>400.0</td>\n",
+       "      <td>8.666667</td>\n",
+       "      <td>3.133668e+06</td>\n",
+       "      <td>33.166667</td>\n",
+       "      <td>37.000000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>43.666667</td>\n",
+       "      <td>34.500000</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>31.166667</td>\n",
+       "      <td>249.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>407.0</td>\n",
-       "      <td>363.0</td>\n",
-       "      <td>407.0</td>\n",
-       "      <td>393.0</td>\n",
-       "      <td>275.0</td>\n",
-       "      <td>335.0</td>\n",
-       "      <td>270.0</td>\n",
-       "      <td>3197968.0</td>\n",
-       "      <td>695124.0</td>\n",
-       "      <td>3348363.0</td>\n",
+       "      <td>248.166667</td>\n",
+       "      <td>271.666667</td>\n",
+       "      <td>277.500000</td>\n",
+       "      <td>298.000000</td>\n",
+       "      <td>233.666667</td>\n",
+       "      <td>271.166667</td>\n",
+       "      <td>216.333333</td>\n",
+       "      <td>3.198448e+06</td>\n",
+       "      <td>695656.500000</td>\n",
+       "      <td>3.348910e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.5</td>\n",
-       "      <td>3131822.0</td>\n",
-       "      <td>68.0</td>\n",
-       "      <td>61.0</td>\n",
-       "      <td>67.0</td>\n",
-       "      <td>66.0</td>\n",
-       "      <td>46.0</td>\n",
-       "      <td>55.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>402.0</td>\n",
+       "      <td>8.833333</td>\n",
+       "      <td>3.135413e+06</td>\n",
+       "      <td>41.500000</td>\n",
+       "      <td>45.666667</td>\n",
+       "      <td>46.500000</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>39.333333</td>\n",
+       "      <td>45.500000</td>\n",
+       "      <td>36.166667</td>\n",
+       "      <td>297.666667</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>257.0</td>\n",
-       "      <td>307.0</td>\n",
-       "      <td>315.0</td>\n",
-       "      <td>361.0</td>\n",
-       "      <td>317.0</td>\n",
-       "      <td>354.0</td>\n",
-       "      <td>271.0</td>\n",
-       "      <td>3198011.0</td>\n",
-       "      <td>695175.0</td>\n",
-       "      <td>3348416.0</td>\n",
+       "      <td>253.166667</td>\n",
+       "      <td>256.166667</td>\n",
+       "      <td>242.666667</td>\n",
+       "      <td>265.333333</td>\n",
+       "      <td>211.666667</td>\n",
+       "      <td>226.666667</td>\n",
+       "      <td>181.000000</td>\n",
+       "      <td>3.198691e+06</td>\n",
+       "      <td>695911.333333</td>\n",
+       "      <td>3.349157e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>10.5</td>\n",
-       "      <td>3132179.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>62.0</td>\n",
-       "      <td>53.0</td>\n",
-       "      <td>60.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>357.0</td>\n",
+       "      <td>8.433333</td>\n",
+       "      <td>3.137001e+06</td>\n",
+       "      <td>42.333333</td>\n",
+       "      <td>42.833333</td>\n",
+       "      <td>40.500000</td>\n",
+       "      <td>44.166667</td>\n",
+       "      <td>35.333333</td>\n",
+       "      <td>37.833333</td>\n",
+       "      <td>30.333333</td>\n",
+       "      <td>268.000000</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>267.0</td>\n",
-       "      <td>309.0</td>\n",
-       "      <td>314.0</td>\n",
-       "      <td>355.0</td>\n",
-       "      <td>262.0</td>\n",
-       "      <td>246.0</td>\n",
-       "      <td>212.0</td>\n",
-       "      <td>3198056.0</td>\n",
-       "      <td>695226.0</td>\n",
-       "      <td>3348470.0</td>\n",
+       "      <td>305.000000</td>\n",
+       "      <td>312.333333</td>\n",
+       "      <td>346.166667</td>\n",
+       "      <td>329.833333</td>\n",
+       "      <td>280.666667</td>\n",
+       "      <td>308.833333</td>\n",
+       "      <td>271.833333</td>\n",
+       "      <td>3.198978e+06</td>\n",
+       "      <td>696195.833333</td>\n",
+       "      <td>3.349452e+06</td>\n",
        "      <td>...</td>\n",
-       "      <td>9.6</td>\n",
-       "      <td>3132501.0</td>\n",
-       "      <td>45.0</td>\n",
-       "      <td>51.0</td>\n",
-       "      <td>54.0</td>\n",
-       "      <td>59.0</td>\n",
-       "      <td>43.0</td>\n",
-       "      <td>41.0</td>\n",
-       "      <td>36.0</td>\n",
-       "      <td>322.0</td>\n",
+       "      <td>9.083333</td>\n",
+       "      <td>3.138843e+06</td>\n",
+       "      <td>50.500000</td>\n",
+       "      <td>51.166667</td>\n",
+       "      <td>55.500000</td>\n",
+       "      <td>53.666667</td>\n",
+       "      <td>46.166667</td>\n",
+       "      <td>49.666667</td>\n",
+       "      <td>41.166667</td>\n",
+       "      <td>341.833333</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1055,38 +1172,38 @@
        "</div>"
       ],
       "text/plain": [
-       "   value_S01  value_S02  value_S03  value_S04  value_S05  value_S06  \\\n",
-       "0      323.0      320.0      284.0      348.0      273.0      342.0   \n",
-       "1      346.0      384.0      367.0      411.0      331.0      360.0   \n",
-       "2      407.0      363.0      407.0      393.0      275.0      335.0   \n",
-       "3      257.0      307.0      315.0      361.0      317.0      354.0   \n",
-       "4      267.0      309.0      314.0      355.0      262.0      246.0   \n",
+       "    value_S01   value_S02   value_S03   value_S04   value_S05   value_S06  \\\n",
+       "0  313.333333  323.833333  336.000000  364.666667  286.500000  314.000000   \n",
+       "1  197.500000  221.333333  216.000000  260.666667  206.833333  235.833333   \n",
+       "2  248.166667  271.666667  277.500000  298.000000  233.666667  271.166667   \n",
+       "3  253.166667  256.166667  242.666667  265.333333  211.666667  226.666667   \n",
+       "4  305.000000  312.333333  346.166667  329.833333  280.666667  308.833333   \n",
        "\n",
-       "   value_S07  value_S08  value_S09  value_S10  ...  value_S17  value_S18  \\\n",
-       "0      280.0  3197842.0   695000.0  3348234.0  ...       11.7  3131020.0   \n",
-       "1      249.0  3197900.0   695063.0  3348296.0  ...       10.2  3131420.0   \n",
-       "2      270.0  3197968.0   695124.0  3348363.0  ...        9.5  3131822.0   \n",
-       "3      271.0  3198011.0   695175.0  3348416.0  ...       10.5  3132179.0   \n",
-       "4      212.0  3198056.0   695226.0  3348470.0  ...        9.6  3132501.0   \n",
+       "    value_S07     value_S08      value_S09     value_S10  ...  value_S17  \\\n",
+       "0  243.166667  3.197980e+06  695143.166667  3.348384e+06  ...  10.383333   \n",
+       "1  186.666667  3.198221e+06  695403.666667  3.348651e+06  ...   8.666667   \n",
+       "2  216.333333  3.198448e+06  695656.500000  3.348910e+06  ...   8.833333   \n",
+       "3  181.000000  3.198691e+06  695911.333333  3.349157e+06  ...   8.433333   \n",
+       "4  271.833333  3.198978e+06  696195.833333  3.349452e+06  ...   9.083333   \n",
        "\n",
-       "   value_S19  value_S20  value_S21  value_S22  value_S23  value_S24  \\\n",
-       "0       55.0       55.0       47.0       58.0       45.0       58.0   \n",
-       "1       58.0       63.0       62.0       67.0       55.0       61.0   \n",
-       "2       68.0       61.0       67.0       66.0       46.0       55.0   \n",
-       "3       43.0       51.0       53.0       62.0       53.0       60.0   \n",
-       "4       45.0       51.0       54.0       59.0       43.0       41.0   \n",
+       "      value_S18  value_S19  value_S20  value_S21  value_S22  value_S23  \\\n",
+       "0  3.131958e+06  52.666667  54.333333  56.166667  61.000000  47.666667   \n",
+       "1  3.133668e+06  33.166667  37.000000  36.166667  43.666667  34.500000   \n",
+       "2  3.135413e+06  41.500000  45.666667  46.500000  49.666667  39.333333   \n",
+       "3  3.137001e+06  42.333333  42.833333  40.500000  44.166667  35.333333   \n",
+       "4  3.138843e+06  50.500000  51.166667  55.500000  53.666667  46.166667   \n",
        "\n",
-       "   value_S25  value_S26  \n",
-       "0       47.0      356.0  \n",
-       "1       42.0      400.0  \n",
-       "2       45.0      402.0  \n",
-       "3       45.0      357.0  \n",
-       "4       36.0      322.0  \n",
+       "   value_S24  value_S25   value_S26  \n",
+       "0  52.666667  40.833333  357.333333  \n",
+       "1  39.333333  31.166667  249.666667  \n",
+       "2  45.500000  36.166667  297.666667  \n",
+       "3  37.833333  30.333333  268.000000  \n",
+       "4  49.666667  41.166667  341.833333  \n",
        "\n",
        "[5 rows x 26 columns]"
       ]
      },
-     "execution_count": 17,
+     "execution_count": 20,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1108,17 +1225,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 21,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/sarah/anaconda3/envs/draco/lib/python3.8/site-packages/sklearn/impute/_base.py:356: FutureWarning: The 'verbose' parameter was deprecated in version 1.1 and will be removed in 1.3. A warning will always be raised upon the removal of empty columns in the future version.\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
    "source": [
-    "step = 3\n",
+    "step = 4\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 22,
    "metadata": {},
    "outputs": [
     {
@@ -1127,7 +1253,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 19,
+     "execution_count": 22,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1138,50 +1264,50 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 20,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[3.230000e+02, 3.200000e+02, 2.840000e+02, 3.480000e+02,\n",
-       "        2.730000e+02, 3.420000e+02, 2.800000e+02, 3.197842e+06,\n",
-       "        6.950000e+05, 3.348234e+06, 3.436762e+06, 3.322362e+06,\n",
-       "        3.357952e+06, 3.223797e+06, 8.300000e+00, 6.000000e+00,\n",
-       "        1.170000e+01, 3.131020e+06, 5.500000e+01, 5.500000e+01,\n",
-       "        4.700000e+01, 5.800000e+01, 4.500000e+01, 5.800000e+01,\n",
-       "        4.700000e+01, 3.560000e+02],\n",
-       "       [3.460000e+02, 3.840000e+02, 3.670000e+02, 4.110000e+02,\n",
-       "        3.310000e+02, 3.600000e+02, 2.490000e+02, 3.197900e+06,\n",
-       "        6.950630e+05, 3.348296e+06, 3.436829e+06, 3.322417e+06,\n",
-       "        3.358013e+06, 3.223839e+06, 7.600000e+00, 5.000000e+00,\n",
-       "        1.020000e+01, 3.131420e+06, 5.800000e+01, 6.300000e+01,\n",
-       "        6.200000e+01, 6.700000e+01, 5.500000e+01, 6.100000e+01,\n",
-       "        4.200000e+01, 4.000000e+02],\n",
-       "       [4.070000e+02, 3.630000e+02, 4.070000e+02, 3.930000e+02,\n",
-       "        2.750000e+02, 3.350000e+02, 2.700000e+02, 3.197968e+06,\n",
-       "        6.951240e+05, 3.348363e+06, 3.436895e+06, 3.322463e+06,\n",
-       "        3.358068e+06, 3.223884e+06, 7.800000e+00, 5.700000e+00,\n",
-       "        9.500000e+00, 3.131822e+06, 6.800000e+01, 6.100000e+01,\n",
-       "        6.700000e+01, 6.600000e+01, 4.600000e+01, 5.500000e+01,\n",
-       "        4.500000e+01, 4.020000e+02],\n",
-       "       [2.570000e+02, 3.070000e+02, 3.150000e+02, 3.610000e+02,\n",
-       "        3.170000e+02, 3.540000e+02, 2.710000e+02, 3.198011e+06,\n",
-       "        6.951750e+05, 3.348416e+06, 3.436957e+06, 3.322516e+06,\n",
-       "        3.358128e+06, 3.223929e+06, 8.600000e+00, 6.600000e+00,\n",
-       "        1.050000e+01, 3.132179e+06, 4.300000e+01, 5.100000e+01,\n",
-       "        5.300000e+01, 6.200000e+01, 5.300000e+01, 6.000000e+01,\n",
-       "        4.500000e+01, 3.570000e+02],\n",
-       "       [2.670000e+02, 3.090000e+02, 3.140000e+02, 3.550000e+02,\n",
-       "        2.620000e+02, 2.460000e+02, 2.120000e+02, 3.198056e+06,\n",
-       "        6.952260e+05, 3.348470e+06, 3.437016e+06, 3.322559e+06,\n",
-       "        3.358169e+06, 3.223965e+06, 7.500000e+00, 5.900000e+00,\n",
-       "        9.600000e+00, 3.132501e+06, 4.500000e+01, 5.100000e+01,\n",
-       "        5.400000e+01, 5.900000e+01, 4.300000e+01, 4.100000e+01,\n",
-       "        3.600000e+01, 3.220000e+02]])"
+       "array([[3.13333333e+02, 3.23833333e+02, 3.36000000e+02, 3.64666667e+02,\n",
+       "        2.86500000e+02, 3.14000000e+02, 2.43166667e+02, 3.19798000e+06,\n",
+       "        6.95143167e+05, 3.34838383e+06, 3.43692150e+06, 3.32248667e+06,\n",
+       "        3.35809000e+06, 3.22390150e+06, 7.95000000e+00, 5.85000000e+00,\n",
+       "        1.03833333e+01, 3.13195833e+06, 5.26666667e+01, 5.43333333e+01,\n",
+       "        5.61666667e+01, 6.10000000e+01, 4.76666667e+01, 5.26666667e+01,\n",
+       "        4.08333333e+01, 3.57333333e+02],\n",
+       "       [1.97500000e+02, 2.21333333e+02, 2.16000000e+02, 2.60666667e+02,\n",
+       "        2.06833333e+02, 2.35833333e+02, 1.86666667e+02, 3.19822067e+06,\n",
+       "        6.95403667e+05, 3.34865117e+06, 3.43722283e+06, 3.32272200e+06,\n",
+       "        3.35834000e+06, 3.22409567e+06, 6.83333333e+00, 5.15000000e+00,\n",
+       "        8.66666667e+00, 3.13366817e+06, 3.31666667e+01, 3.70000000e+01,\n",
+       "        3.61666667e+01, 4.36666667e+01, 3.45000000e+01, 3.93333333e+01,\n",
+       "        3.11666667e+01, 2.49666667e+02],\n",
+       "       [2.48166667e+02, 2.71666667e+02, 2.77500000e+02, 2.98000000e+02,\n",
+       "        2.33666667e+02, 2.71166667e+02, 2.16333333e+02, 3.19844767e+06,\n",
+       "        6.95656500e+05, 3.34890967e+06, 3.43751900e+06, 3.32295950e+06,\n",
+       "        3.35862067e+06, 3.22432333e+06, 7.11666667e+00, 5.56666667e+00,\n",
+       "        8.83333333e+00, 3.13541283e+06, 4.15000000e+01, 4.56666667e+01,\n",
+       "        4.65000000e+01, 4.96666667e+01, 3.93333333e+01, 4.55000000e+01,\n",
+       "        3.61666667e+01, 2.97666667e+02],\n",
+       "       [2.53166667e+02, 2.56166667e+02, 2.42666667e+02, 2.65333333e+02,\n",
+       "        2.11666667e+02, 2.26666667e+02, 1.81000000e+02, 3.19869117e+06,\n",
+       "        6.95911333e+05, 3.34915717e+06, 3.43778050e+06, 3.32316850e+06,\n",
+       "        3.35884883e+06, 3.22450217e+06, 6.71666667e+00, 5.16666667e+00,\n",
+       "        8.43333333e+00, 3.13700133e+06, 4.23333333e+01, 4.28333333e+01,\n",
+       "        4.05000000e+01, 4.41666667e+01, 3.53333333e+01, 3.78333333e+01,\n",
+       "        3.03333333e+01, 2.68000000e+02],\n",
+       "       [3.05000000e+02, 3.12333333e+02, 3.46166667e+02, 3.29833333e+02,\n",
+       "        2.80666667e+02, 3.08833333e+02, 2.71833333e+02, 3.19897850e+06,\n",
+       "        6.96195833e+05, 3.34945200e+06, 3.43807767e+06, 3.32340933e+06,\n",
+       "        3.35910983e+06, 3.22471400e+06, 7.20000000e+00, 5.28333333e+00,\n",
+       "        9.08333333e+00, 3.13884333e+06, 5.05000000e+01, 5.11666667e+01,\n",
+       "        5.55000000e+01, 5.36666667e+01, 4.61666667e+01, 4.96666667e+01,\n",
+       "        4.11666667e+01, 3.41833333e+02]])"
       ]
      },
-     "execution_count": 20,
+     "execution_count": 23,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1203,17 +1329,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 21,
+   "execution_count": 24,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 4\n",
+    "step = 5\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": 25,
    "metadata": {},
    "outputs": [
     {
@@ -1222,7 +1348,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 22,
+     "execution_count": 25,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1233,45 +1359,45 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 23,
+   "execution_count": 26,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "array([[-0.23563892, -0.24267292, -0.3286385 , -0.17702227, -0.35287222,\n",
-       "        -0.19248826, -0.3317757 , -1.        , -1.        , -1.        ,\n",
-       "        -1.        , -1.        , -1.        , -1.        , -0.11702128,\n",
-       "        -0.24050633, -0.25714286, -0.37378787, -0.22758621, -0.22758621,\n",
-       "        -0.31972789, -0.1862069 , -0.36986301, -0.1862069 , -0.33793103,\n",
-       "        -0.26141079],\n",
-       "       [-0.18171161, -0.0926143 , -0.13380282, -0.02930832, -0.21688159,\n",
-       "        -0.15023474, -0.40420561, -0.99995911, -0.99995779, -0.99995941,\n",
-       "        -0.99995718, -0.99996326, -0.99996042, -0.99997164, -0.19148936,\n",
-       "        -0.36708861, -0.35238095, -0.37370786, -0.1862069 , -0.11724138,\n",
-       "        -0.11564626, -0.06206897, -0.23287671, -0.14482759, -0.40689655,\n",
-       "        -0.17012448],\n",
-       "       [-0.03868699, -0.14185229, -0.0399061 , -0.07151231, -0.34818288,\n",
-       "        -0.20892019, -0.35514019, -0.99991116, -0.99991693, -0.99991555,\n",
-       "        -0.999915  , -0.99993254, -0.99992474, -0.99994125, -0.17021277,\n",
-       "        -0.27848101, -0.3968254 , -0.37362746, -0.04827586, -0.14482759,\n",
-       "        -0.04761905, -0.07586207, -0.35616438, -0.22758621, -0.36551724,\n",
-       "        -0.1659751 ],\n",
-       "       [-0.39038687, -0.27315358, -0.25586854, -0.14654162, -0.24970692,\n",
-       "        -0.16431925, -0.35280374, -0.99988085, -0.99988276, -0.99988086,\n",
-       "        -0.99987538, -0.99989714, -0.99988581, -0.99991086, -0.08510638,\n",
-       "        -0.16455696, -0.33333333, -0.37355606, -0.39310345, -0.28275862,\n",
-       "        -0.23809524, -0.13103448, -0.26027397, -0.15862069, -0.36551724,\n",
-       "        -0.2593361 ],\n",
-       "       [-0.36694021, -0.26846424, -0.25821596, -0.16060961, -0.37866354,\n",
-       "        -0.41784038, -0.49065421, -0.99984912, -0.99984859, -0.99984551,\n",
-       "        -0.99983767, -0.99986841, -0.99985921, -0.99988655, -0.20212766,\n",
-       "        -0.25316456, -0.39047619, -0.37349166, -0.36551724, -0.28275862,\n",
-       "        -0.2244898 , -0.17241379, -0.39726027, -0.42068966, -0.48965517,\n",
-       "        -0.33195021]])"
+       "array([[-0.26126126, -0.23706897, -0.20870076, -0.14106583, -0.32328767,\n",
+       "        -0.25969448, -0.42198789, -1.        , -1.        , -1.        ,\n",
+       "        -1.        , -1.        , -1.        , -1.        , -0.11007463,\n",
+       "        -0.16824645, -0.10424155, -0.37397741, -0.25233645, -0.22716628,\n",
+       "        -0.20140515, -0.13481829, -0.32239156, -0.25380117, -0.4182243 ,\n",
+       "        -0.25697453],\n",
+       "       [-0.53349001, -0.47805643, -0.49088771, -0.38557994, -0.51037182,\n",
+       "        -0.44339992, -0.55438391, -0.99983031, -0.99982547, -0.99982499,\n",
+       "        -0.99980741, -0.9998428 , -0.99983779, -0.99986887, -0.23507463,\n",
+       "        -0.26777251, -0.25233645, -0.37363511, -0.52570093, -0.470726  ,\n",
+       "        -0.4824356 , -0.37866354, -0.50762016, -0.44093567, -0.55373832,\n",
+       "        -0.48085254],\n",
+       "       [-0.41441441, -0.35971787, -0.3462669 , -0.29780564, -0.44735812,\n",
+       "        -0.36036036, -0.48486624, -0.99967026, -0.99965608, -0.99965576,\n",
+       "        -0.99961813, -0.99968416, -0.99965569, -0.99971512, -0.20335821,\n",
+       "        -0.20853081, -0.2379583 , -0.37328583, -0.4088785 , -0.34894614,\n",
+       "        -0.33723653, -0.29425557, -0.43962485, -0.35438596, -0.48364486,\n",
+       "        -0.38104315],\n",
+       "       [-0.40266353, -0.39615987, -0.4281795 , -0.37460815, -0.49902153,\n",
+       "        -0.4649432 , -0.56766257, -0.99949857, -0.99948535, -0.99949373,\n",
+       "        -0.999451  , -0.99954455, -0.99950765, -0.99959435, -0.24813433,\n",
+       "        -0.26540284, -0.27246585, -0.37296782, -0.39719626, -0.38875878,\n",
+       "        -0.42154567, -0.37162954, -0.49589683, -0.4619883 , -0.56542056,\n",
+       "        -0.4427309 ],\n",
+       "       [-0.28084606, -0.26410658, -0.18479326, -0.22296238, -0.3369863 ,\n",
+       "        -0.27183705, -0.35481351, -0.99929598, -0.99929474, -0.99930071,\n",
+       "        -0.99926107, -0.99938368, -0.99933831, -0.9994513 , -0.19402985,\n",
+       "        -0.24881517, -0.21639109, -0.37259906, -0.28271028, -0.27166276,\n",
+       "        -0.21077283, -0.23798359, -0.34349355, -0.29590643, -0.4135514 ,\n",
+       "        -0.28920464]])"
       ]
      },
-     "execution_count": 23,
+     "execution_count": 26,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1293,17 +1419,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 24,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 5\n",
+    "step = 6\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [
     {
@@ -1312,7 +1438,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 25,
+     "execution_count": 28,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1323,7 +1449,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 29,
    "metadata": {},
    "outputs": [
     {
@@ -1373,123 +1499,123 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.235639</td>\n",
-       "      <td>-0.242673</td>\n",
-       "      <td>-0.328638</td>\n",
-       "      <td>-0.177022</td>\n",
-       "      <td>-0.352872</td>\n",
-       "      <td>-0.192488</td>\n",
-       "      <td>-0.331776</td>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.257143</td>\n",
-       "      <td>-0.373788</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.319728</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.369863</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.337931</td>\n",
-       "      <td>-0.261411</td>\n",
+       "      <td>-0.104242</td>\n",
+       "      <td>-0.373977</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>-0.181712</td>\n",
-       "      <td>-0.092614</td>\n",
-       "      <td>-0.133803</td>\n",
-       "      <td>-0.029308</td>\n",
-       "      <td>-0.216882</td>\n",
-       "      <td>-0.150235</td>\n",
-       "      <td>-0.404206</td>\n",
-       "      <td>-0.999959</td>\n",
-       "      <td>-0.999958</td>\n",
-       "      <td>-0.999959</td>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.352381</td>\n",
-       "      <td>-0.373708</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.117241</td>\n",
-       "      <td>-0.115646</td>\n",
-       "      <td>-0.062069</td>\n",
-       "      <td>-0.232877</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.406897</td>\n",
-       "      <td>-0.170124</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.373635</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.038687</td>\n",
-       "      <td>-0.141852</td>\n",
-       "      <td>-0.039906</td>\n",
-       "      <td>-0.071512</td>\n",
-       "      <td>-0.348183</td>\n",
-       "      <td>-0.208920</td>\n",
-       "      <td>-0.355140</td>\n",
-       "      <td>-0.999911</td>\n",
-       "      <td>-0.999917</td>\n",
-       "      <td>-0.999916</td>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.396825</td>\n",
-       "      <td>-0.373627</td>\n",
-       "      <td>-0.048276</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.047619</td>\n",
-       "      <td>-0.075862</td>\n",
-       "      <td>-0.356164</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.165975</td>\n",
+       "      <td>-0.237958</td>\n",
+       "      <td>-0.373286</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>-0.390387</td>\n",
-       "      <td>-0.273154</td>\n",
-       "      <td>-0.255869</td>\n",
-       "      <td>-0.146542</td>\n",
-       "      <td>-0.249707</td>\n",
-       "      <td>-0.164319</td>\n",
-       "      <td>-0.352804</td>\n",
-       "      <td>-0.999881</td>\n",
-       "      <td>-0.999883</td>\n",
-       "      <td>-0.999881</td>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.333333</td>\n",
-       "      <td>-0.373556</td>\n",
-       "      <td>-0.393103</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.238095</td>\n",
-       "      <td>-0.131034</td>\n",
-       "      <td>-0.260274</td>\n",
-       "      <td>-0.158621</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.259336</td>\n",
+       "      <td>-0.272466</td>\n",
+       "      <td>-0.372968</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.366940</td>\n",
-       "      <td>-0.268464</td>\n",
-       "      <td>-0.258216</td>\n",
-       "      <td>-0.160610</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.417840</td>\n",
-       "      <td>-0.490654</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999846</td>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.390476</td>\n",
-       "      <td>-0.373492</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.224490</td>\n",
-       "      <td>-0.172414</td>\n",
-       "      <td>-0.397260</td>\n",
-       "      <td>-0.420690</td>\n",
-       "      <td>-0.489655</td>\n",
-       "      <td>-0.331950</td>\n",
+       "      <td>-0.216391</td>\n",
+       "      <td>-0.372599</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1498,30 +1624,30 @@
       ],
       "text/plain": [
        "         0         1         2         3         4         5         6   \\\n",
-       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
-       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
-       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
-       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
-       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
        "\n",
        "         7         8         9   ...        16        17        18        19  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.257143 -0.373788 -0.227586 -0.227586   \n",
-       "1 -0.999959 -0.999958 -0.999959  ... -0.352381 -0.373708 -0.186207 -0.117241   \n",
-       "2 -0.999911 -0.999917 -0.999916  ... -0.396825 -0.373627 -0.048276 -0.144828   \n",
-       "3 -0.999881 -0.999883 -0.999881  ... -0.333333 -0.373556 -0.393103 -0.282759   \n",
-       "4 -0.999849 -0.999849 -0.999846  ... -0.390476 -0.373492 -0.365517 -0.282759   \n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.104242 -0.373977 -0.252336 -0.227166   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.252336 -0.373635 -0.525701 -0.470726   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.237958 -0.373286 -0.408879 -0.348946   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.272466 -0.372968 -0.397196 -0.388759   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.216391 -0.372599 -0.282710 -0.271663   \n",
        "\n",
        "         20        21        22        23        24        25  \n",
-       "0 -0.319728 -0.186207 -0.369863 -0.186207 -0.337931 -0.261411  \n",
-       "1 -0.115646 -0.062069 -0.232877 -0.144828 -0.406897 -0.170124  \n",
-       "2 -0.047619 -0.075862 -0.356164 -0.227586 -0.365517 -0.165975  \n",
-       "3 -0.238095 -0.131034 -0.260274 -0.158621 -0.365517 -0.259336  \n",
-       "4 -0.224490 -0.172414 -0.397260 -0.420690 -0.489655 -0.331950  \n",
+       "0 -0.201405 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975  \n",
+       "1 -0.482436 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853  \n",
+       "2 -0.337237 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043  \n",
+       "3 -0.421546 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731  \n",
+       "4 -0.210773 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205  \n",
        "\n",
        "[5 rows x 26 columns]"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 29,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1543,17 +1669,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 30,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 6\n",
+    "step = 7\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 31,
    "metadata": {},
    "outputs": [
     {
@@ -1562,7 +1688,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 28,
+     "execution_count": 31,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1573,7 +1699,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 32,
    "metadata": {},
    "outputs": [
     {
@@ -1623,122 +1749,122 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.235639</td>\n",
-       "      <td>-0.242673</td>\n",
-       "      <td>-0.328638</td>\n",
-       "      <td>-0.177022</td>\n",
-       "      <td>-0.352872</td>\n",
-       "      <td>-0.192488</td>\n",
-       "      <td>-0.331776</td>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.373788</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.319728</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.369863</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.337931</td>\n",
-       "      <td>-0.261411</td>\n",
+       "      <td>-0.373977</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
        "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>-0.181712</td>\n",
-       "      <td>-0.092614</td>\n",
-       "      <td>-0.133803</td>\n",
-       "      <td>-0.029308</td>\n",
-       "      <td>-0.216882</td>\n",
-       "      <td>-0.150235</td>\n",
-       "      <td>-0.404206</td>\n",
-       "      <td>-0.999959</td>\n",
-       "      <td>-0.999958</td>\n",
-       "      <td>-0.999959</td>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.373708</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.117241</td>\n",
-       "      <td>-0.115646</td>\n",
-       "      <td>-0.062069</td>\n",
-       "      <td>-0.232877</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.406897</td>\n",
-       "      <td>-0.170124</td>\n",
+       "      <td>-0.373635</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
        "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.038687</td>\n",
-       "      <td>-0.141852</td>\n",
-       "      <td>-0.039906</td>\n",
-       "      <td>-0.071512</td>\n",
-       "      <td>-0.348183</td>\n",
-       "      <td>-0.208920</td>\n",
-       "      <td>-0.355140</td>\n",
-       "      <td>-0.999911</td>\n",
-       "      <td>-0.999917</td>\n",
-       "      <td>-0.999916</td>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.373627</td>\n",
-       "      <td>-0.048276</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.047619</td>\n",
-       "      <td>-0.075862</td>\n",
-       "      <td>-0.356164</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.165975</td>\n",
+       "      <td>-0.373286</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
        "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>-0.390387</td>\n",
-       "      <td>-0.273154</td>\n",
-       "      <td>-0.255869</td>\n",
-       "      <td>-0.146542</td>\n",
-       "      <td>-0.249707</td>\n",
-       "      <td>-0.164319</td>\n",
-       "      <td>-0.352804</td>\n",
-       "      <td>-0.999881</td>\n",
-       "      <td>-0.999883</td>\n",
-       "      <td>-0.999881</td>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.373556</td>\n",
-       "      <td>-0.393103</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.238095</td>\n",
-       "      <td>-0.131034</td>\n",
-       "      <td>-0.260274</td>\n",
-       "      <td>-0.158621</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.259336</td>\n",
+       "      <td>-0.372968</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
        "      <td>T001</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.366940</td>\n",
-       "      <td>-0.268464</td>\n",
-       "      <td>-0.258216</td>\n",
-       "      <td>-0.160610</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.417840</td>\n",
-       "      <td>-0.490654</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999846</td>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.373492</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.224490</td>\n",
-       "      <td>-0.172414</td>\n",
-       "      <td>-0.397260</td>\n",
-       "      <td>-0.420690</td>\n",
-       "      <td>-0.489655</td>\n",
-       "      <td>-0.331950</td>\n",
+       "      <td>-0.372599</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
        "      <td>T001</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
@@ -1748,30 +1874,30 @@
       ],
       "text/plain": [
        "          0         1         2         3         4         5         6  \\\n",
-       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
-       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
-       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
-       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
-       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
        "\n",
        "          7         8         9  ...        17        18        19        20  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.373788 -0.227586 -0.227586 -0.319728   \n",
-       "1 -0.999959 -0.999958 -0.999959  ... -0.373708 -0.186207 -0.117241 -0.115646   \n",
-       "2 -0.999911 -0.999917 -0.999916  ... -0.373627 -0.048276 -0.144828 -0.047619   \n",
-       "3 -0.999881 -0.999883 -0.999881  ... -0.373556 -0.393103 -0.282759 -0.238095   \n",
-       "4 -0.999849 -0.999849 -0.999846  ... -0.373492 -0.365517 -0.282759 -0.224490   \n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.373977 -0.252336 -0.227166 -0.201405   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.373635 -0.525701 -0.470726 -0.482436   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.373286 -0.408879 -0.348946 -0.337237   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.372968 -0.397196 -0.388759 -0.421546   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.372599 -0.282710 -0.271663 -0.210773   \n",
        "\n",
        "         21        22        23        24        25  turbine_id  \n",
-       "0 -0.186207 -0.369863 -0.186207 -0.337931 -0.261411        T001  \n",
-       "1 -0.062069 -0.232877 -0.144828 -0.406897 -0.170124        T001  \n",
-       "2 -0.075862 -0.356164 -0.227586 -0.365517 -0.165975        T001  \n",
-       "3 -0.131034 -0.260274 -0.158621 -0.365517 -0.259336        T001  \n",
-       "4 -0.172414 -0.397260 -0.420690 -0.489655 -0.331950        T001  \n",
+       "0 -0.134818 -0.322392 -0.253801 -0.418224 -0.256975        T001  \n",
+       "1 -0.378664 -0.507620 -0.440936 -0.553738 -0.480853        T001  \n",
+       "2 -0.294256 -0.439625 -0.354386 -0.483645 -0.381043        T001  \n",
+       "3 -0.371630 -0.495897 -0.461988 -0.565421 -0.442731        T001  \n",
+       "4 -0.237984 -0.343494 -0.295906 -0.413551 -0.289205        T001  \n",
        "\n",
        "[5 rows x 27 columns]"
       ]
      },
-     "execution_count": 29,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1793,17 +1919,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 33,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 7\n",
+    "step = 8\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 34,
    "metadata": {},
    "outputs": [
     {
@@ -1812,7 +1938,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 31,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -1823,7 +1949,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 32,
+   "execution_count": 35,
    "metadata": {},
    "outputs": [
     {
@@ -1873,123 +1999,123 @@
        "  <tbody>\n",
        "    <tr>\n",
        "      <th>0</th>\n",
-       "      <td>-0.235639</td>\n",
-       "      <td>-0.242673</td>\n",
-       "      <td>-0.328638</td>\n",
-       "      <td>-0.177022</td>\n",
-       "      <td>-0.352872</td>\n",
-       "      <td>-0.192488</td>\n",
-       "      <td>-0.331776</td>\n",
+       "      <td>-0.261261</td>\n",
+       "      <td>-0.237069</td>\n",
+       "      <td>-0.208701</td>\n",
+       "      <td>-0.141066</td>\n",
+       "      <td>-0.323288</td>\n",
+       "      <td>-0.259694</td>\n",
+       "      <td>-0.421988</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>-1.000000</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.319728</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.369863</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.337931</td>\n",
-       "      <td>-0.261411</td>\n",
+       "      <td>-0.252336</td>\n",
+       "      <td>-0.227166</td>\n",
+       "      <td>-0.201405</td>\n",
+       "      <td>-0.134818</td>\n",
+       "      <td>-0.322392</td>\n",
+       "      <td>-0.253801</td>\n",
+       "      <td>-0.418224</td>\n",
+       "      <td>-0.256975</td>\n",
        "      <td>T001</td>\n",
        "      <td>2013-01-10 00:00:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
-       "      <td>-0.181712</td>\n",
-       "      <td>-0.092614</td>\n",
-       "      <td>-0.133803</td>\n",
-       "      <td>-0.029308</td>\n",
-       "      <td>-0.216882</td>\n",
-       "      <td>-0.150235</td>\n",
-       "      <td>-0.404206</td>\n",
-       "      <td>-0.999959</td>\n",
-       "      <td>-0.999958</td>\n",
-       "      <td>-0.999959</td>\n",
+       "      <td>-0.533490</td>\n",
+       "      <td>-0.478056</td>\n",
+       "      <td>-0.490888</td>\n",
+       "      <td>-0.385580</td>\n",
+       "      <td>-0.510372</td>\n",
+       "      <td>-0.443400</td>\n",
+       "      <td>-0.554384</td>\n",
+       "      <td>-0.999830</td>\n",
+       "      <td>-0.999825</td>\n",
+       "      <td>-0.999825</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.186207</td>\n",
-       "      <td>-0.117241</td>\n",
-       "      <td>-0.115646</td>\n",
-       "      <td>-0.062069</td>\n",
-       "      <td>-0.232877</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.406897</td>\n",
-       "      <td>-0.170124</td>\n",
+       "      <td>-0.525701</td>\n",
+       "      <td>-0.470726</td>\n",
+       "      <td>-0.482436</td>\n",
+       "      <td>-0.378664</td>\n",
+       "      <td>-0.507620</td>\n",
+       "      <td>-0.440936</td>\n",
+       "      <td>-0.553738</td>\n",
+       "      <td>-0.480853</td>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:10:00</td>\n",
+       "      <td>2013-01-10 01:00:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
-       "      <td>-0.038687</td>\n",
-       "      <td>-0.141852</td>\n",
-       "      <td>-0.039906</td>\n",
-       "      <td>-0.071512</td>\n",
-       "      <td>-0.348183</td>\n",
-       "      <td>-0.208920</td>\n",
-       "      <td>-0.355140</td>\n",
-       "      <td>-0.999911</td>\n",
-       "      <td>-0.999917</td>\n",
-       "      <td>-0.999916</td>\n",
+       "      <td>-0.414414</td>\n",
+       "      <td>-0.359718</td>\n",
+       "      <td>-0.346267</td>\n",
+       "      <td>-0.297806</td>\n",
+       "      <td>-0.447358</td>\n",
+       "      <td>-0.360360</td>\n",
+       "      <td>-0.484866</td>\n",
+       "      <td>-0.999670</td>\n",
+       "      <td>-0.999656</td>\n",
+       "      <td>-0.999656</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.048276</td>\n",
-       "      <td>-0.144828</td>\n",
-       "      <td>-0.047619</td>\n",
-       "      <td>-0.075862</td>\n",
-       "      <td>-0.356164</td>\n",
-       "      <td>-0.227586</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.165975</td>\n",
+       "      <td>-0.408879</td>\n",
+       "      <td>-0.348946</td>\n",
+       "      <td>-0.337237</td>\n",
+       "      <td>-0.294256</td>\n",
+       "      <td>-0.439625</td>\n",
+       "      <td>-0.354386</td>\n",
+       "      <td>-0.483645</td>\n",
+       "      <td>-0.381043</td>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:20:00</td>\n",
+       "      <td>2013-01-10 02:00:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
-       "      <td>-0.390387</td>\n",
-       "      <td>-0.273154</td>\n",
-       "      <td>-0.255869</td>\n",
-       "      <td>-0.146542</td>\n",
-       "      <td>-0.249707</td>\n",
-       "      <td>-0.164319</td>\n",
-       "      <td>-0.352804</td>\n",
-       "      <td>-0.999881</td>\n",
-       "      <td>-0.999883</td>\n",
-       "      <td>-0.999881</td>\n",
+       "      <td>-0.402664</td>\n",
+       "      <td>-0.396160</td>\n",
+       "      <td>-0.428180</td>\n",
+       "      <td>-0.374608</td>\n",
+       "      <td>-0.499022</td>\n",
+       "      <td>-0.464943</td>\n",
+       "      <td>-0.567663</td>\n",
+       "      <td>-0.999499</td>\n",
+       "      <td>-0.999485</td>\n",
+       "      <td>-0.999494</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.393103</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.238095</td>\n",
-       "      <td>-0.131034</td>\n",
-       "      <td>-0.260274</td>\n",
-       "      <td>-0.158621</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.259336</td>\n",
+       "      <td>-0.397196</td>\n",
+       "      <td>-0.388759</td>\n",
+       "      <td>-0.421546</td>\n",
+       "      <td>-0.371630</td>\n",
+       "      <td>-0.495897</td>\n",
+       "      <td>-0.461988</td>\n",
+       "      <td>-0.565421</td>\n",
+       "      <td>-0.442731</td>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:30:00</td>\n",
+       "      <td>2013-01-10 03:00:00</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
-       "      <td>-0.366940</td>\n",
-       "      <td>-0.268464</td>\n",
-       "      <td>-0.258216</td>\n",
-       "      <td>-0.160610</td>\n",
-       "      <td>-0.378664</td>\n",
-       "      <td>-0.417840</td>\n",
-       "      <td>-0.490654</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999849</td>\n",
-       "      <td>-0.999846</td>\n",
+       "      <td>-0.280846</td>\n",
+       "      <td>-0.264107</td>\n",
+       "      <td>-0.184793</td>\n",
+       "      <td>-0.222962</td>\n",
+       "      <td>-0.336986</td>\n",
+       "      <td>-0.271837</td>\n",
+       "      <td>-0.354814</td>\n",
+       "      <td>-0.999296</td>\n",
+       "      <td>-0.999295</td>\n",
+       "      <td>-0.999301</td>\n",
        "      <td>...</td>\n",
-       "      <td>-0.365517</td>\n",
-       "      <td>-0.282759</td>\n",
-       "      <td>-0.224490</td>\n",
-       "      <td>-0.172414</td>\n",
-       "      <td>-0.397260</td>\n",
-       "      <td>-0.420690</td>\n",
-       "      <td>-0.489655</td>\n",
-       "      <td>-0.331950</td>\n",
+       "      <td>-0.282710</td>\n",
+       "      <td>-0.271663</td>\n",
+       "      <td>-0.210773</td>\n",
+       "      <td>-0.237984</td>\n",
+       "      <td>-0.343494</td>\n",
+       "      <td>-0.295906</td>\n",
+       "      <td>-0.413551</td>\n",
+       "      <td>-0.289205</td>\n",
        "      <td>T001</td>\n",
-       "      <td>2013-01-10 00:40:00</td>\n",
+       "      <td>2013-01-10 04:00:00</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1998,30 +2124,30 @@
       ],
       "text/plain": [
        "          0         1         2         3         4         5         6  \\\n",
-       "0 -0.235639 -0.242673 -0.328638 -0.177022 -0.352872 -0.192488 -0.331776   \n",
-       "1 -0.181712 -0.092614 -0.133803 -0.029308 -0.216882 -0.150235 -0.404206   \n",
-       "2 -0.038687 -0.141852 -0.039906 -0.071512 -0.348183 -0.208920 -0.355140   \n",
-       "3 -0.390387 -0.273154 -0.255869 -0.146542 -0.249707 -0.164319 -0.352804   \n",
-       "4 -0.366940 -0.268464 -0.258216 -0.160610 -0.378664 -0.417840 -0.490654   \n",
+       "0 -0.261261 -0.237069 -0.208701 -0.141066 -0.323288 -0.259694 -0.421988   \n",
+       "1 -0.533490 -0.478056 -0.490888 -0.385580 -0.510372 -0.443400 -0.554384   \n",
+       "2 -0.414414 -0.359718 -0.346267 -0.297806 -0.447358 -0.360360 -0.484866   \n",
+       "3 -0.402664 -0.396160 -0.428180 -0.374608 -0.499022 -0.464943 -0.567663   \n",
+       "4 -0.280846 -0.264107 -0.184793 -0.222962 -0.336986 -0.271837 -0.354814   \n",
        "\n",
        "          7         8         9  ...        18        19        20        21  \\\n",
-       "0 -1.000000 -1.000000 -1.000000  ... -0.227586 -0.227586 -0.319728 -0.186207   \n",
-       "1 -0.999959 -0.999958 -0.999959  ... -0.186207 -0.117241 -0.115646 -0.062069   \n",
-       "2 -0.999911 -0.999917 -0.999916  ... -0.048276 -0.144828 -0.047619 -0.075862   \n",
-       "3 -0.999881 -0.999883 -0.999881  ... -0.393103 -0.282759 -0.238095 -0.131034   \n",
-       "4 -0.999849 -0.999849 -0.999846  ... -0.365517 -0.282759 -0.224490 -0.172414   \n",
+       "0 -1.000000 -1.000000 -1.000000  ... -0.252336 -0.227166 -0.201405 -0.134818   \n",
+       "1 -0.999830 -0.999825 -0.999825  ... -0.525701 -0.470726 -0.482436 -0.378664   \n",
+       "2 -0.999670 -0.999656 -0.999656  ... -0.408879 -0.348946 -0.337237 -0.294256   \n",
+       "3 -0.999499 -0.999485 -0.999494  ... -0.397196 -0.388759 -0.421546 -0.371630   \n",
+       "4 -0.999296 -0.999295 -0.999301  ... -0.282710 -0.271663 -0.210773 -0.237984   \n",
        "\n",
        "         22        23        24        25  turbine_id           timestamp  \n",
-       "0 -0.369863 -0.186207 -0.337931 -0.261411        T001 2013-01-10 00:00:00  \n",
-       "1 -0.232877 -0.144828 -0.406897 -0.170124        T001 2013-01-10 00:10:00  \n",
-       "2 -0.356164 -0.227586 -0.365517 -0.165975        T001 2013-01-10 00:20:00  \n",
-       "3 -0.260274 -0.158621 -0.365517 -0.259336        T001 2013-01-10 00:30:00  \n",
-       "4 -0.397260 -0.420690 -0.489655 -0.331950        T001 2013-01-10 00:40:00  \n",
+       "0 -0.322392 -0.253801 -0.418224 -0.256975        T001 2013-01-10 00:00:00  \n",
+       "1 -0.507620 -0.440936 -0.553738 -0.480853        T001 2013-01-10 01:00:00  \n",
+       "2 -0.439625 -0.354386 -0.483645 -0.381043        T001 2013-01-10 02:00:00  \n",
+       "3 -0.495897 -0.461988 -0.565421 -0.442731        T001 2013-01-10 03:00:00  \n",
+       "4 -0.343494 -0.295906 -0.413551 -0.289205        T001 2013-01-10 04:00:00  \n",
        "\n",
        "[5 rows x 28 columns]"
       ]
      },
-     "execution_count": 32,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2044,7 +2170,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 36,
    "metadata": {},
    "outputs": [
     {
@@ -2053,29 +2179,29 @@
        "{'window_size': 24, 'cutoff_time': 'cutoff_time', 'time_index': 'timestamp'}"
       ]
      },
-     "execution_count": 33,
+     "execution_count": 36,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
     "pipeline._pipeline.get_hyperparameters()[\n",
-    "    'mlprimitives.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
+    "    'mlstars.custom.timeseries_preprocessing.cutoff_window_sequences#1']"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 37,
    "metadata": {},
    "outputs": [],
    "source": [
-    "step = 8\n",
+    "step = 9\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 38,
    "metadata": {},
    "outputs": [
     {
@@ -2084,7 +2210,7 @@
        "dict_keys(['readings', 'turbines', 'turbine_id', 'timestamp', 'X', 'y'])"
       ]
      },
-     "execution_count": 35,
+     "execution_count": 38,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2095,16 +2221,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 39,
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "(51121, 28)"
+       "(8521, 28)"
       ]
      },
-     "execution_count": 36,
+     "execution_count": 39,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2115,7 +2241,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 40,
    "metadata": {},
    "outputs": [
     {
@@ -2124,7 +2250,7 @@
        "(353,)"
       ]
      },
-     "execution_count": 37,
+     "execution_count": 40,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2135,7 +2261,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 41,
    "metadata": {},
    "outputs": [
     {
@@ -2144,7 +2270,7 @@
        "(353, 24, 26)"
       ]
      },
-     "execution_count": 38,
+     "execution_count": 41,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2155,7 +2281,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 42,
    "metadata": {
     "scrolled": false
    },
@@ -2163,27 +2289,27 @@
     {
      "data": {
       "text/plain": [
-       "array([[-0.66002345, -0.57327081, -0.64084507, -0.57796014, -0.6014068 ,\n",
-       "        -0.56103286, -0.55140187, -0.9928135 , -0.99291267, -0.99315058,\n",
-       "        -0.99304288, -0.99346346, -0.99352632, -0.99395333, -0.42553191,\n",
-       "        -0.41772152, -0.58730159, -0.35996294, -0.66896552, -0.57241379,\n",
-       "        -0.61904762, -0.5862069 , -0.60273973, -0.55862069, -0.55862069,\n",
-       "        -0.59751037],\n",
-       "       [-0.2989449 , -0.38569754, -0.48591549, -0.47713951, -0.66705744,\n",
-       "        -0.5915493 , -0.77336449, -0.99278389, -0.9928852 , -0.99312701,\n",
-       "        -0.99301988, -0.9934481 , -0.9935075 , -0.9939459 , -0.39361702,\n",
-       "        -0.40506329, -0.54285714, -0.35992014, -0.40689655, -0.42068966,\n",
-       "        -0.46938776, -0.48965517, -0.67123288, -0.5862069 , -0.83448276,\n",
-       "        -0.5560166 ],\n",
-       "       [-0.33645955, -0.40679953, -0.39906103, -0.38569754, -0.56154748,\n",
-       "        -0.43192488, -0.45560748, -0.99275498, -0.9928584 , -0.99310017,\n",
-       "        -0.99299431, -0.99342739, -0.99348349, -0.99392294, -0.29787234,\n",
-       "        -0.3164557 , -0.49206349, -0.35986854, -0.42068966, -0.43448276,\n",
-       "        -0.40136054, -0.43448276, -0.56164384, -0.47586207, -0.51724138,\n",
-       "        -0.46473029]])"
+       "array([[-0.58793576, -0.60305643, -0.63981971, -0.61481191, -0.69823875,\n",
+       "        -0.65021543, -0.68912322, -0.99436914, -0.99439755, -0.99454249,\n",
+       "        -0.99446788, -0.99476185, -0.99490997, -0.99529511, -0.34701493,\n",
+       "        -0.33886256, -0.33860532, -0.36301186, -0.57943925, -0.59250585,\n",
+       "        -0.6323185 , -0.60609613, -0.69284877, -0.64444444, -0.68691589,\n",
+       "        -0.63853752],\n",
+       "       [-0.56600078, -0.5846395 , -0.63002156, -0.61559561, -0.70880626,\n",
+       "        -0.66392479, -0.69732474, -0.9942427 , -0.99427986, -0.9944408 ,\n",
+       "        -0.99436498, -0.99468147, -0.99482011, -0.99521249, -0.33955224,\n",
+       "        -0.31516588, -0.38892883, -0.36280656, -0.55841121, -0.57611241,\n",
+       "        -0.62295082, -0.61078546, -0.70222743, -0.65847953, -0.69392523,\n",
+       "        -0.63645815],\n",
+       "       [-0.64081473, -0.64184953, -0.67038997, -0.63597179, -0.71350294,\n",
+       "        -0.65844105, -0.66764304, -0.99412236, -0.99416864, -0.99434228,\n",
+       "        -0.99426059, -0.99459663, -0.99472365, -0.99511795, -0.34328358,\n",
+       "        -0.30094787, -0.36304817, -0.36259859, -0.63317757, -0.6323185 ,\n",
+       "        -0.66042155, -0.62954279, -0.70926143, -0.65380117, -0.66588785,\n",
+       "        -0.66002426]])"
       ]
      },
-     "execution_count": 39,
+     "execution_count": 42,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -2205,22 +2331,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 43,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "2022-01-18 07:34:41.001707: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations:  AVX2 FMA\n",
-      "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
-      "2022-01-18 07:34:41.024991: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fac7ea34260 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
-      "2022-01-18 07:34:41.025038: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
+      "2023-04-13 18:22:35.422060: I tensorflow/core/platform/cpu_feature_guard.cc:143] Your CPU supports instructions that this TensorFlow binary was not compiled to use: AVX2 FMA\n",
+      "2023-04-13 18:22:35.434959: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fbf6980d6b0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+      "2023-04-13 18:22:35.434974: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version\n"
      ]
     }
    ],
    "source": [
-    "step = 9\n",
+    "step = 10\n",
     "context = pipeline.fit(**context, output_=step, start_=step)"
    ]
   }
@@ -2241,7 +2366,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.7.11"
+   "version": "3.8.16"
   }
  },
  "nbformat": 4,

From c7ce06771980fe5a5516fc9c9d5e759984b03677 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Thu, 20 Jul 2023 18:13:12 -0400
Subject: [PATCH 165/171] Update demo bucket (#76)

* update bucket

* update macos version
---
 .github/workflows/tests.yml | 2 +-
 draco/demo.py               | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 17d140f..69cf892 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -70,7 +70,7 @@ jobs:
     strategy:
       matrix:
         python-version: [3.6, 3.7, 3.8]
-        os: [ubuntu-20.04, macos-10.15]
+        os: [ubuntu-20.04, macos-latest]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
diff --git a/draco/demo.py b/draco/demo.py
index dcd4126..ef91fc5 100644
--- a/draco/demo.py
+++ b/draco/demo.py
@@ -7,7 +7,7 @@
 
 LOGGER = logging.getLogger(__name__)
 
-S3_URL = 'https://d3-ai-greenguard.s3.amazonaws.com/'
+S3_URL = 'https://sintel-draco.s3.amazonaws.com/'
 DEMO_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'demo')
 
 _FILES = {
@@ -46,7 +46,7 @@ def load_demo(name='default', load_readings=True):
     Subsequent calls will load the cached data instead of downloading it again.
     
     Args:
-        rul (str):
+        name (str):
             Name of the dataset to load. If "RUL", load NASA's CMAPSS dataset
             https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/#turbofan.
             If "default" then load default demo.

From 4fcdccf22cb5980f119eba52eb220c34565d772f Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <40212131+sarahmish@users.noreply.github.com>
Date: Mon, 24 Jul 2023 08:11:59 -0400
Subject: [PATCH 166/171] Release 0.3 (#77)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* Bump version: 0.2.1.dev0 → 0.2.1.dev1

* add release notes

* update history
---
 HISTORY.md        | 9 +++++++++
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 4 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/HISTORY.md b/HISTORY.md
index 539ca0e..a1cef30 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,5 +1,14 @@
 # History
 
+## 0.3.0 - 2022-07-20
+
+This release switches from ``MLPrimitives`` to ``ml-stars``.
+Moreover, we remove all pipelines using deep feature synthesis.
+
+* Update demo bucket - [Issue #76](https://github.com/sintel-dev/Draco/issues/76) by @sarahmish
+* Remove ``dfs`` based pipelines - [Issue #73](https://github.com/sintel-dev/Draco/issues/73) by @sarahmish
+* Move from ``MLPrimitives`` to ``ml-stars`` - [Issue #72](https://github.com/sintel-dev/Draco/issues/72) by @sarahmish
+
 
 ## 0.2.0 - 2022-04-12
 
diff --git a/draco/__init__.py b/draco/__init__.py
index e134da2..b54bb16 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.1.dev0'
+__version__ = '0.2.1.dev1'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index e78faaa..97a6f92 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.1.dev0
+current_version = 0.2.1.dev1
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 9087746..dba269e 100644
--- a/setup.py
+++ b/setup.py
@@ -118,6 +118,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.2.1.dev0',
+    version='0.2.1.dev1',
     zip_safe=False,
 )

From 7b00501a8acb66197715b8a9125b54cff98aec99 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <sarahalnegheimish@gmail.com>
Date: Mon, 31 Jul 2023 17:34:42 +0300
Subject: [PATCH 167/171] =?UTF-8?q?Bump=20version:=200.2.1.dev1=20?=
 =?UTF-8?q?=E2=86=92=200.2.1.dev2?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index b54bb16..2b4fb3b 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.1.dev1'
+__version__ = '0.2.1.dev2'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 97a6f92..7c17e7e 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.1.dev1
+current_version = 0.2.1.dev2
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index dba269e..a229a7d 100644
--- a/setup.py
+++ b/setup.py
@@ -118,6 +118,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.2.1.dev1',
+    version='0.2.1.dev2',
     zip_safe=False,
 )

From 3fa39d7a4103f247ccd76c68f5476c017db017b1 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <sarahalnegheimish@gmail.com>
Date: Mon, 31 Jul 2023 18:34:55 +0300
Subject: [PATCH 168/171] prepare release

---
 HISTORY.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/HISTORY.md b/HISTORY.md
index a1cef30..ebd216a 100644
--- a/HISTORY.md
+++ b/HISTORY.md
@@ -1,6 +1,6 @@
 # History
 
-## 0.3.0 - 2022-07-20
+## 0.3.0 - 2022-07-31
 
 This release switches from ``MLPrimitives`` to ``ml-stars``.
 Moreover, we remove all pipelines using deep feature synthesis.

From 122ee46f9a2376bb4f009f593f2c4d031a5f3bb5 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <sarahalnegheimish@gmail.com>
Date: Mon, 31 Jul 2023 18:35:21 +0300
Subject: [PATCH 169/171] =?UTF-8?q?Bump=20version:=200.2.1.dev2=20?=
 =?UTF-8?q?=E2=86=92=200.3.0.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index 2b4fb3b..8d60420 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.2.1.dev2'
+__version__ = '0.3.0.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 7c17e7e..b8b1363 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.2.1.dev2
+current_version = 0.3.0.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index a229a7d..48f9ac6 100644
--- a/setup.py
+++ b/setup.py
@@ -118,6 +118,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.2.1.dev2',
+    version='0.3.0.dev0',
     zip_safe=False,
 )

From a574fe24543ee8f9cdb929407fc94432894651a7 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <sarahalnegheimish@gmail.com>
Date: Mon, 31 Jul 2023 18:35:22 +0300
Subject: [PATCH 170/171] =?UTF-8?q?Bump=20version:=200.3.0.dev0=20?=
 =?UTF-8?q?=E2=86=92=200.3.0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index 8d60420..b042dce 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.3.0.dev0'
+__version__ = '0.3.0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index b8b1363..6445146 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.0.dev0
+current_version = 0.3.0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 48f9ac6..569e9ae 100644
--- a/setup.py
+++ b/setup.py
@@ -118,6 +118,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.3.0.dev0',
+    version='0.3.0',
     zip_safe=False,
 )

From 113e14fddb3b31570537aaf011b0e95255511855 Mon Sep 17 00:00:00 2001
From: Sarah Alnegheimish <sarahalnegheimish@gmail.com>
Date: Mon, 31 Jul 2023 18:36:01 +0300
Subject: [PATCH 171/171] =?UTF-8?q?Bump=20version:=200.3.0=20=E2=86=92=200?=
 =?UTF-8?q?.3.1.dev0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 draco/__init__.py | 2 +-
 setup.cfg         | 2 +-
 setup.py          | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/draco/__init__.py b/draco/__init__.py
index b042dce..9b2e05b 100644
--- a/draco/__init__.py
+++ b/draco/__init__.py
@@ -4,7 +4,7 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.3.0'
+__version__ = '0.3.1.dev0'
 
 import os
 
diff --git a/setup.cfg b/setup.cfg
index 6445146..e595904 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,5 +1,5 @@
 [bumpversion]
-current_version = 0.3.0
+current_version = 0.3.1.dev0
 commit = True
 tag = True
 parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\.(?P<release>[a-z]+)(?P<candidate>\d+))?
diff --git a/setup.py b/setup.py
index 569e9ae..5d475a5 100644
--- a/setup.py
+++ b/setup.py
@@ -118,6 +118,6 @@
     test_suite='tests',
     tests_require=tests_require,
     url='https://github.com/sintel-dev/Draco',
-    version='0.3.0',
+    version='0.3.1.dev0',
     zip_safe=False,
 )