sintel-dev
diff --git a/‎.github/workflows/tests.yml
+23-6 b/‎.github/workflows/tests.yml
+23-6
diff --git a/‎.gitignore
+3 b/‎.gitignore
+3
diff --git a/‎HISTORY.md
+12-1 b/‎HISTORY.md
+12-1
diff --git a/‎Makefile
+7-1 b/‎Makefile
+7-1
diff --git a/‎README.md
+7-7 b/‎README.md
+7-7
diff --git a/‎docker/Dockerfile
+1-1 b/‎docker/Dockerfile
+1-1
diff --git a/‎draco/__init__.py
+5-3 b/‎draco/__init__.py
+5-3
diff --git a/‎draco/demo.py
+29-6 b/‎draco/demo.py
+29-6
diff --git a/‎draco/pipeline.py
+28-16 b/‎draco/pipeline.py
+28-16
diff --git a/‎draco/pipelines/classes/normalize_dfs_xgb_classifier.json
-65 b/‎draco/pipelines/classes/normalize_dfs_xgb_classifier.json
-65
@@ -7,11 +7,28 @@ on:
     branches: [ master ]
 
 jobs:
+  docs:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        python-version: [3.8]
+        os: [ubuntu-latest]
+    steps:
+    - uses: actions/checkout@v1
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v1
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install package
+      run: python -m pip install .[dev]
+    - name: make docs
+      run: make docs
+
   lint:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
@@ -30,7 +47,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
@@ -52,8 +69,8 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
-        os: [ubuntu-latest, macos-latest]
+        python-version: [3.6, 3.7, 3.8]
+        os: [ubuntu-latest, macos-10.15]
     steps:
     - uses: actions/checkout@v1
     - name: Set up Python ${{ matrix.python-version }}
@@ -71,7 +88,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
@@ -90,7 +107,7 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        python-version: [3.7, 3.8]
+        python-version: [3.6, 3.7, 3.8]
         os: [ubuntu-latest]
     steps:
     - uses: actions/checkout@v1
 
@@ -112,3 +112,6 @@ notebooks-private/
 scripts/
 dask-worker-space/
 tutorials/*.pkl
+
+*.pkl
+*.DS_Store
@@ -1,9 +1,20 @@
 # History
 
-## 0.1.0 - 2021-01-01
+
+## 0.2.0 - 2022-04-12
+
+This release features a reorganization and renaming of ``Draco`` pipelines. In addtion,
+we update some of the dependencies for general housekeeping.
+
+* Update Draco dependencies - [Issue #66](https://github.com/signals-dev/Draco/issues/66) by @sarahmish
+* Reorganize pipelines - [Issue #63](https://github.com/signals-dev/Draco/issues/63) by @sarahmish
+
+
+## 0.1.0 - 2022-01-01
 
 * First release on ``draco-ml`` PyPI
 
+
 ## Previous GreenGuard development
 
 ### 0.3.0 - 2021-01-22
 
@@ -256,7 +256,7 @@ check-release: check-candidate check-clean check-master check-history ## Check i
 	@echo "A new release can be made"
 
 .PHONY: release
-release: check-release bumpversion-release docker-push publish bumpversion-patch
+release: check-release bumpversion-release publish bumpversion-patch
 
 .PHONY: release-test
 release-test: check-release bumpversion-release-test publish-test bumpversion-revert
@@ -267,6 +267,12 @@ release-candidate: check-master publish bumpversion-candidate
 .PHONY: release-candidate-test
 release-candidate-test: check-clean check-master publish-test
 
+.PHONY: release-minor
+release-minor: check-release bumpversion-minor release
+
+.PHONY: release-major
+release-major: check-release bumpversion-major release
+
 
 # DOCKER TARGETS
 
 
@@ -220,18 +220,18 @@ The returned `pipeline` variable will be `list` containing the names of all the
 available in the Draco system:
 
 ```
-['classes.unstack_double_lstm_timeseries_classifier',
- 'classes.unstack_lstm_timeseries_classifier',
- 'classes.unstack_normalize_dfs_xgb_classifier',
- 'classes.unstack_dfs_xgb_classifier',
- 'classes.normalize_dfs_xgb_classifier']
+['dfs_xgb',
+ 'dfs_xgb_with_unstack',
+ 'dfs_xgb_with_normalization',
+ 'dfs_xgb_with_unstack_normalization',
+ 'dfs_xgb_prob_with_unstack_normalization']
 ```
 
 For the rest of this tutorial, we will select and use the pipeline
-`classes.normalize_dfs_xgb_classifier` as our template.
+`dfs_xgb_with_unstack_normalization` as our template.
 
 ```python3
-pipeline_name = 'classes.normalize_dfs_xgb_classifier'
+pipeline_name = 'dfs_xgb_with_unstack_normalization'
 ```
 
 ## 3. Fitting the Pipeline
 
@@ -1,4 +1,4 @@
-FROM python:3.6
+FROM python:3.7
 
 ARG UID=1000
 EXPOSE 8888
 
@@ -4,16 +4,18 @@
 
 __author__ = """MIT Data To AI Lab"""
 __email__ = 'dailabmit@gmail.com'
-__version__ = '0.1.0'
+__version__ = '0.2.0.dev0'
 
 import os
 
 from draco.pipeline import DracoPipeline, get_pipelines
 
 _BASE_PATH = os.path.abspath(os.path.dirname(__file__))
-MLBLOCKS_PIPELINES = os.path.join(_BASE_PATH, 'pipelines')
 MLBLOCKS_PRIMITIVES = os.path.join(_BASE_PATH, 'primitives')
-
+MLBLOCKS_PIPELINES = tuple(
+    dirname
+    for dirname, _, _ in os.walk(os.path.join(_BASE_PATH, 'pipelines'))
+)
 
 __all__ = (
     'DracoPipeline',
 
@@ -10,6 +10,17 @@
 S3_URL = 'https://d3-ai-greenguard.s3.amazonaws.com/'
 DEMO_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'demo')
 
+_FILES = {
+    'DEFAULT': [
+        ('target_times', 'cutoff_time'),
+        ('readings', 'timestamp')
+    ],
+    'RUL': [
+        ('rul_train_target_times', 'cutoff_time'),
+        ('rul_test_target_times', 'cutoff_time'),
+        ('rul_readings', 'timestamp')
+    ]
+}
 
 def _load_or_download(filename, dates):
     filename += '.csv.gz'
@@ -27,23 +38,35 @@ def _load_or_download(filename, dates):
     return data
 
 
-def load_demo(load_readings=True):
+def load_demo(name='default', load_readings=True):
     """Load the demo included in the Draco project.
 
     The first time that this function is executed, the data will be downloaded
     and cached inside the `draco/demo` folder.
     Subsequent calls will load the cached data instead of downloading it again.
+    
+    Args:
+        rul (str):
+            Name of the dataset to load. If "RUL", load NASA's CMAPSS dataset
+            https://ti.arc.nasa.gov/tech/dash/groups/pcoe/prognostic-data-repository/#turbofan.
+            If "default" then load default demo.
+        load_readings (bool):
+            Whether to load the ``readings`` table or not.
 
     Returns:
         tuple[pandas.DataFrame]:
             target_times and readings tables
     """
-    target_times = _load_or_download('target_times', 'cutoff_time')
-    if load_readings:
-        readings = _load_or_download('readings', 'timestamp')
-        return target_times, readings
+    files = _FILES[name.upper()]
 
-    return target_times
+    if not load_readings:
+        files = files[:-1]
+
+    output = list()
+    for filename, dates in files:
+        output.append(_load_or_download(filename, dates))
+
+    return tuple(output)
 
 
 def generate_raw_readings(output_path='demo'):
 
@@ -9,7 +9,6 @@
 from copy import deepcopy
 from hashlib import md5
 
-import cloudpickle
 import keras
 import numpy as np
 from btb import BTBSession
@@ -54,7 +53,7 @@ def __setstate__(self, state):
 Sequential.__setstate__ = __setstate__
 
 
-def get_pipelines(pattern='', path=False, pipeline_type='classes'):
+def get_pipelines(pattern='', path=False, pipeline_type=None):
     """Get the list of available pipelines.
 
     Optionally filter the names using a patter or obtain
@@ -66,25 +65,33 @@ def get_pipelines(pattern='', path=False, pipeline_type='classes'):
         path (bool):
             Whether to return a dictionary containing the pipeline
             paths instead of only a list with the names.
-        pipeline_type (str):
-            The pipeline category to filter by (`classes`, `probability` and `unstacked`).
-            Defaults to `classes`.
+        pipeline_type (str or list[str]):
+            The pipeline category to filter. Defaults to `None`.
 
     Return:
         list or dict:
             List of available and matching pipeline names.
             If `path=True`, return a dict containing the pipeline
             names as keys and their absolute paths as values.
     """
+    if isinstance(pipeline_type, str):
+        pipeline_type = [pipeline_type]
+    elif pipeline_type is None:
+        pipeline_type = os.listdir(PIPELINES_DIR)
+    
     pipelines = dict()
-    pipelines_dir = os.path.join(PIPELINES_DIR, pipeline_type)
-
-    for filename in os.listdir(pipelines_dir):
-        if filename.endswith('.json') and pattern in filename:
-            name = os.path.basename(filename)[:-len('.json')]
-            name = f'{pipeline_type}.{name}'
-            pipeline_path = os.path.join(pipelines_dir, filename)
-            pipelines[name] = pipeline_path
+    pipelines_dir = [
+        os.path.join(PIPELINES_DIR, ptype)
+        for ptype in pipeline_type
+        if ptype != 'preprocessing'
+    ]
+
+    for pdir in pipelines_dir:
+        for filename in os.listdir(pdir):
+            if filename.endswith('.json') and pattern in filename:
+                name = os.path.basename(filename)[:-len('.json')]
+                pipeline_path = os.path.join(pdir, filename)
+                pipelines[name] = pipeline_path
 
     if not path:
         pipelines = list(pipelines)
@@ -604,14 +611,14 @@ def predict(self, target_times=None, readings=None, turbines=None,
         return predictions
 
     def save(self, path):
-        """Serialize and save this pipeline using cloudpickle.
+        """Serialize and save this pipeline using pickle.
 
         Args:
             path (str):
                 Path to the file where the pipeline will be saved.
         """
         with open(path, 'wb') as pickle_file:
-            cloudpickle.dump(self, pickle_file)
+            pickle.dump(self, pickle_file)
 
     @classmethod
     def load(cls, path):
@@ -626,4 +633,9 @@ def load(cls, path):
                 Loaded DracoPipeline instance.
         """
         with open(path, 'rb') as pickle_file:
-            return cloudpickle.load(pickle_file)
+            pipeline = pickle.load(pickle_file)
+
+        if not isinstance(pipeline, cls):
+            raise ValueError('Serialized object is not a DracoPipeline')
+
+        return pipeline
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-FROM python:3.6`
	`1`	`+FROM python:3.7`
`2`	`2`
`3`	`3`	`ARG UID=1000`
`4`	`4`	`EXPOSE 8888`