From a345e26093d4ce2485603c2b940478cf0b6979af Mon Sep 17 00:00:00 2001
From: Galileo Galilei <yolan.honore.rouge@gmail.com>
Date: Sun, 7 Feb 2021 21:36:44 +0100
Subject: [PATCH] FIX #159 - Refactor CLI to make mlflow.yml read and write
 more flexible to different configuration logic

---
 CHANGELOG.md                                  |   3 +
 docs/source/02_installation/02_setup.md       |   8 +-
 .../03_getting_started/02_first_steps.md      |   4 +-
 docs/source/07_python_objects/04_CLI.md       |   6 +-
 kedro_mlflow/framework/cli/cli.py             |  88 ++++++++-------
 .../framework/context/mlflow_context.py       |  14 ++-
 kedro_mlflow/utils.py                         |   9 --
 tests/framework/cli/test_cli.py               | 101 +++++++++++++++---
 .../framework/context/test_mlflow_context.py  |  18 +++-
 tests/test_utils.py                           |  31 ------
 10 files changed, 174 insertions(+), 108 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 57e5ea8a..58e4e1f5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,14 +5,17 @@
 ### Added
 
 - A new `long_parameters_strategy` key is added in the `mlflow.yml` (under in the hook/node section). You can specify different strategies (`fail`, `truncate` or `tag`) to handle parameters over 250 characters which cause crashes for some mlflow backend. ([#69](https://github.com/Galileo-Galilei/kedro-mlflow/issues/69))
+- Add an `env` parameter to `kedro mlflow init` command to specify under which `conf/` subfolder the `mlflow.yml` should be created. ([#159](https://github.com/Galileo-Galilei/kedro-mlflow/issues/159))
 
 ### Fixed
 
 - Pin the kedro version to force it to be **strictly** inferior to `0.17` which is not compatible with current `kedro-mlflow` version ([#143](https://github.com/Galileo-Galilei/kedro-mlflow/issues/143))
+- It is no longer assumed for the project to run that the `mlflow.yml` is located under `conf/base`. The project will run as soon as the configuration file is discovered by the registered ConfigLoader ([#159](https://github.com/Galileo-Galilei/kedro-mlflow/issues/159))
 
 ### Changed
 
 - The `KedroPipelineModel.load_context()` method now loads all the `DataSets` in memory in the `DataCatalog`. It is also now possible to specify the `runner` to execute the model as well as the `copy_mode` when executing the inference pipeline (instead of deepcopying the datasets between each nodes which is kedro's default). This makes the API serving with `mlflow serve` command considerably faster (~20 times faster) for models which needs compiling (i.e. keras, tensorflow ...) ([#133](https://github.com/Galileo-Galilei/kedro-mlflow/issues/133))
+- The CLI projects commands are now always accessible even if you have not called `kedro mlflow init` yet to create a `mlflow.yml` configuration file ([#159](https://github.com/Galileo-Galilei/kedro-mlflow/issues/159))
 
 ## [0.4.1] - 2020-12-03
 
diff --git a/docs/source/02_installation/02_setup.md b/docs/source/02_installation/02_setup.md
index 3b321c41..28a18393 100644
--- a/docs/source/02_installation/02_setup.md
+++ b/docs/source/02_installation/02_setup.md
@@ -31,7 +31,13 @@ kedro mlflow init
 you should see the following message:
 
 ```console
-'conf/base/mlflow.yml' successfully updated.
+'conf/local/mlflow.yml' successfully updated.
+```
+
+*Note: you can create the configuration file in another kedro environment with the `--env` argument:*
+
+```console
+kedro mlflow init --env=<other-environment>
 ```
 
 ### Declaring kedro-mlflow hooks
diff --git a/docs/source/03_getting_started/02_first_steps.md b/docs/source/03_getting_started/02_first_steps.md
index f35a0c71..907690c6 100644
--- a/docs/source/03_getting_started/02_first_steps.md
+++ b/docs/source/03_getting_started/02_first_steps.md
@@ -11,10 +11,10 @@ kedro mlflow init
 You will see the following message:
 
 ```console
-'conf/base/mlflow.yml' successfully updated.
+'conf/local/mlflow.yml' successfully updated.
 ```
 
-The ``conf/base`` folder is updated and you can see the `mlflow.yml` file:
+The ``conf/local`` folder is updated and you can see the `mlflow.yml` file:
 
 ![initialized_project](../imgs/initialized_project.png)
 
diff --git a/docs/source/07_python_objects/04_CLI.md b/docs/source/07_python_objects/04_CLI.md
index f344fb49..4f852075 100644
--- a/docs/source/07_python_objects/04_CLI.md
+++ b/docs/source/07_python_objects/04_CLI.md
@@ -3,9 +3,13 @@
 ## ``init``
 
  ``kedro mlflow init``: this command is needed to initalize your project. You cannot run any other commands before you run this one once. It performs 2 actions:
-    - creates a ``mlflow.yml`` configuration file in your ``conf/base`` folder
+    - creates a ``mlflow.yml`` configuration file in your ``conf/local`` folder
     - replace the ``src/PYTHON_PACKAGE/run.py`` file by an updated version of the template. If your template has been modified since project creation, a warning wil be raised. You can either run ``kedro mlflow init --force`` to ignore this warning (but this will erase your ``run.py``) or [set hooks manually](#new-hooks).
 
+Init has two arguments:
+- `--env` which enable to specifiy another environment where the mlflow.yml should be created (e.g, `base`)
+- `--force` which overrides the `mlflow.yml` if it already exists and replaces it with the default one. Use it with caution!
+
 ## ``ui``
 
 ``kedro mlflow ui``: this command opens the mlflow UI (basically launches the ``mlflow ui`` command with the configuration of your ``mlflow.yml`` file)
diff --git a/kedro_mlflow/framework/cli/cli.py b/kedro_mlflow/framework/cli/cli.py
index c4b41666..181097b1 100644
--- a/kedro_mlflow/framework/cli/cli.py
+++ b/kedro_mlflow/framework/cli/cli.py
@@ -6,7 +6,7 @@
 
 from kedro_mlflow.framework.cli.cli_utils import write_jinja_template
 from kedro_mlflow.framework.context import get_mlflow_config
-from kedro_mlflow.utils import _already_updated, _is_kedro_project
+from kedro_mlflow.utils import _is_kedro_project
 
 try:
     from kedro.framework.context import get_static_project_data
@@ -26,20 +26,14 @@ def reset_commands(self):
         # add commands on the fly based on conditions
         if _is_kedro_project():
             self.add_command(init)
-            if _already_updated():
-                self.add_command(ui)
-                # self.add_command(run) # TODO : IMPLEMENT THIS FUNCTION
+            self.add_command(ui)
+            # self.add_command(run) # TODO : IMPLEMENT THIS FUNCTION
         else:
             self.add_command(new)
 
     def list_commands(self, ctx):
         self.reset_commands()
         commands_list = sorted(self.commands)
-        if commands_list == ["init"]:
-            click.secho(
-                """\n\nYou have not updated your template yet.\nThis is mandatory to use 'kedro-mlflow' plugin.\nPlease run the following command before you can access to other commands :\n\n$ kedro mlflow init""",
-                fg="yellow",
-            )
         return commands_list
 
     def get_command(self, ctx, cmd_name):
@@ -60,12 +54,18 @@ def mlflow_commands():
 
 
 @mlflow_commands.command()
+@click.option(
+    "--env",
+    "-e",
+    default="local",
+    help="The name of the kedro environment where the 'mlflow.yml' should be created. Default to 'local'",
+)
 @click.option(
     "--force",
     "-f",
     is_flag=True,
     default=False,
-    help="Update the template without any checks. The modifications you made in 'run.py' will be lost.",
+    help="Update the template without any checks. If a 'mlflow.yml' already eists, it will be overridden.",
 )
 @click.option(
     "--silent",
@@ -74,49 +74,56 @@ def mlflow_commands():
     default=False,
     help="Should message be logged when files are modified?",
 )
-def init(force, silent):
+def init(env, force, silent):
     """Updates the template of a kedro project.
-    Running this command is mandatory to use kedro-mlflow.
-    2 actions are performed :
-        1. Add "conf/base/mlflow.yml": This is a configuration file
-         used for run parametrization when calling "kedro run" command.
-         See INSERT_DOC_URL for further details.
-        2. Modify "src/YOUR_PACKAGE_NAME/run.py" to add mlflow hooks
-         to the ProjectContext. This will erase your current "run.py"
-         script and all your modifications will be lost.
-         If you do not want to erase "run.py", insert the hooks manually
+    Running this command is mandatory to use kedro-mlflow. This
+    adds a configuration file used for run parametrization when
+    calling "kedro run" command.
     """
 
     # get constants
+    mlflow_yml = "mlflow.yml"
     project_path = Path().cwd()
     project_globals = get_static_project_data(project_path)
     context = load_context(project_path)
-    conf_root = context.CONF_ROOT
+    mlflow_yml_path = project_path / context.CONF_ROOT / env / mlflow_yml
 
-    # mlflow.yml is just a static file,
-    # but the name of the experiment is set to be the same as the project
-    mlflow_yml = "mlflow.yml"
-    write_jinja_template(
-        src=TEMPLATE_FOLDER_PATH / mlflow_yml,
-        is_cookiecutter=False,
-        dst=project_path / conf_root / "base" / mlflow_yml,
-        python_package=project_globals["package_name"],
-    )
-    if not silent:
+    # mlflow.yml is just a static file
+    if mlflow_yml_path.is_file() and not force:
         click.secho(
             click.style(
-                f"'{conf_root}/base/mlflow.yml' successfully updated.", fg="green"
+                f"A 'mlflow.yml' already exists at '{mlflow_yml_path}' You can use the ``--force`` option to override it.",
+                fg="red",
             )
         )
+        return 1
+    else:
+        try:
+            write_jinja_template(
+                src=TEMPLATE_FOLDER_PATH / mlflow_yml,
+                is_cookiecutter=False,
+                dst=mlflow_yml_path,
+                python_package=project_globals["package_name"],
+            )
+        except FileNotFoundError:
+            click.secho(
+                click.style(
+                    f"No env '{env}' found. Please check this folder exists inside '{context.CONF_ROOT}' folder.",
+                    fg="red",
+                )
+            )
+            return 1
+        if not silent:
+            click.secho(
+                click.style(
+                    f"'{context.CONF_ROOT}/{env}/{mlflow_yml}' successfully updated.",
+                    fg="green",
+                )
+            )
+        return 0
 
 
 @mlflow_commands.command()
-@click.option(
-    "--project-path",
-    "-p",
-    required=False,
-    help="The environment within conf folder we want to retrieve.",
-)
 @click.option(
     "--env",
     "-e",
@@ -124,15 +131,14 @@ def init(force, silent):
     default="local",
     help="The environment within conf folder we want to retrieve.",
 )
-def ui(project_path, env):
+def ui(env):
     """Opens the mlflow user interface with the
     project-specific settings of mlflow.yml. This interface
     enables to browse and compares runs.
 
     """
 
-    if not project_path:
-        project_path = Path().cwd()
+    project_path = Path().cwd()
     context = load_context(project_path=project_path, env=env)
     # the context must contains the self.mlflow attribues with mlflow configuration
     mlflow_conf = get_mlflow_config(context)
diff --git a/kedro_mlflow/framework/context/mlflow_context.py b/kedro_mlflow/framework/context/mlflow_context.py
index 8ed91bae..50824afc 100644
--- a/kedro_mlflow/framework/context/mlflow_context.py
+++ b/kedro_mlflow/framework/context/mlflow_context.py
@@ -1,14 +1,22 @@
+from kedro.config import MissingConfigException
 from kedro.framework.context import KedroContext
 
-from kedro_mlflow.framework.context.config import KedroMlflowConfig
+from kedro_mlflow.framework.context.config import (
+    KedroMlflowConfig,
+    KedroMlflowConfigError,
+)
 
 
 # this could be a read-only property in the context
 # with a @property decorator
 # but for consistency with hook system, it is an external function
 def get_mlflow_config(context: KedroContext):
-
-    conf_mlflow_yml = context.config_loader.get("mlflow*", "mlflow*/**")
+    try:
+        conf_mlflow_yml = context.config_loader.get("mlflow*", "mlflow*/**")
+    except MissingConfigException:
+        raise KedroMlflowConfigError(
+            "No 'mlflow.yml' config file found in environment. Use ``kedro mlflow init`` command in CLI to create a default config file."
+        )
     conf_mlflow = KedroMlflowConfig(context.project_path)
     conf_mlflow.from_dict(conf_mlflow_yml)
     return conf_mlflow
diff --git a/kedro_mlflow/utils.py b/kedro_mlflow/utils.py
index 980c95a8..bdff5881 100644
--- a/kedro_mlflow/utils.py
+++ b/kedro_mlflow/utils.py
@@ -60,15 +60,6 @@ def _validate_project_path(project_path: Union[str, Path, None] = None) -> Path:
     return project_path
 
 
-def _already_updated(project_path: Union[str, Path, None] = None) -> bool:
-    project_path = _validate_project_path(project_path)
-    flag = False
-    # TODO : add a better check ...
-    if (project_path / "conf" / "base" / "mlflow.yml").is_file():
-        flag = True
-    return flag
-
-
 def _parse_requirements(path, encoding="utf-8"):
     with open(path, mode="r", encoding=encoding) as file_handler:
         requirements = [
diff --git a/tests/framework/cli/test_cli.py b/tests/framework/cli/test_cli.py
index 4080c03d..8da4c281 100644
--- a/tests/framework/cli/test_cli.py
+++ b/tests/framework/cli/test_cli.py
@@ -1,15 +1,18 @@
 import re
-import subprocess
+import subprocess  # noqa: F401
 
 import pytest
+import yaml
 from click.testing import CliRunner
 from cookiecutter.main import cookiecutter
 from kedro import __version__ as kedro_version
 from kedro.framework.cli.cli import TEMPLATE_PATH, info
+from kedro.framework.context import load_context
 
 from kedro_mlflow.framework.cli.cli import init as cli_init
 from kedro_mlflow.framework.cli.cli import mlflow_commands as cli_mlflow
 from kedro_mlflow.framework.cli.cli import ui as cli_ui
+from kedro_mlflow.framework.context import get_mlflow_config
 
 
 @pytest.fixture
@@ -63,22 +66,13 @@ def test_mlflow_commands_outside_kedro_project(monkeypatch, tmp_path):
     assert {"new"} == set(extract_cmd_from_help(result.output))
 
 
-def test_mlflow_commands_inside_non_initialized_kedro_project(
-    mocker, monkeypatch, tmp_path, kedro_project
-):
-    monkeypatch.chdir(tmp_path / "fake-project")
-    cli_runner = CliRunner()
-    result = cli_runner.invoke(cli_mlflow)
-    assert {"init"} == set(extract_cmd_from_help(result.output))
-    assert "You have not updated your template yet" in result.output
-
-
-def test_mlflow_commands_inside_initialized_kedro_project(
-    mocker, monkeypatch, tmp_path, kedro_project
+def test_mlflow_commands_inside_kedro_project(
+    monkeypatch,
+    tmp_path,
+    kedro_project,
 ):
     monkeypatch.chdir(tmp_path / "fake-project")
     # launch the command to initialize the project
-    subprocess.call(["kedro", "mlflow", "init"])
     cli_runner = CliRunner()
     result = cli_runner.invoke(cli_mlflow)
     assert {"init", "ui"} == set(extract_cmd_from_help(result.output))
@@ -97,8 +91,83 @@ def test_cli_init(monkeypatch, tmp_path, kedro_project):
     assert result.exit_code == 0
 
     # check mlflow.yml file
-    assert "'conf/base/mlflow.yml' successfully updated." in result.output
-    assert (project_path / "conf" / "base" / "mlflow.yml").is_file()
+    assert "'conf/local/mlflow.yml' successfully updated." in result.output
+    assert (project_path / "conf/local/mlflow.yml").is_file()
+
+
+def test_cli_init_existing_config(monkeypatch, tmp_path, kedro_project):
+    # "kedro_project" is a pytest.fixture declared in conftest
+    project_path = tmp_path / "fake-project"
+    monkeypatch.chdir(project_path)
+    cli_runner = CliRunner()
+
+    project_context = load_context(project_path.as_posix())
+    # emulate first call by writing a mlflow.yml file
+    yaml_str = yaml.dump(dict(mlflow_tracking_uri="toto"))
+    (project_path / project_context.CONF_ROOT / "local/mlflow.yml").write_text(yaml_str)
+
+    result = cli_runner.invoke(cli_init)
+
+    # check an error message is raised
+    assert "A 'mlflow.yml' already exists" in result.output
+
+    # check the file remains unmodified
+    assert get_mlflow_config(project_context).mlflow_tracking_uri.endswith("toto")
+
+
+def test_cli_init_existing_config_force_option(monkeypatch, tmp_path, kedro_project):
+    # "kedro_project" is a pytest.fixture declared in conftest
+    project_path = tmp_path / "fake-project"
+    monkeypatch.chdir(project_path)
+    cli_runner = CliRunner()
+
+    project_context = load_context(project_path.as_posix())
+    # emulate first call by writing a mlflow.yml file
+    yaml_str = yaml.dump(dict(mlflow_tracking_uri="toto"))
+    (project_path / project_context.CONF_ROOT / "local/mlflow.yml").write_text(yaml_str)
+
+    result = cli_runner.invoke(cli_init, args="--force")
+
+    # check an error message is raised
+    assert "successfully updated" in result.output
+
+    # check the file remains unmodified
+    assert get_mlflow_config(project_context).mlflow_tracking_uri.endswith("mlruns")
+
+
+@pytest.mark.parametrize(
+    "env",
+    ["base", "local"],
+)
+def test_cli_init_with_env(monkeypatch, tmp_path, kedro_project, env):
+    # "kedro_project" is a pytest.fixture declared in conftest
+    project_path = tmp_path / "fake-project"
+    monkeypatch.chdir(project_path)
+    cli_runner = CliRunner()
+    result = cli_runner.invoke(cli_init, f"--env {env}")
+
+    # FIRST TEST:
+    # the command should have executed propery
+    assert result.exit_code == 0
+
+    # check mlflow.yml file
+    assert f"'conf/{env}/mlflow.yml' successfully updated." in result.output
+    assert (project_path / "conf" / env / "mlflow.yml").is_file()
+
+
+@pytest.mark.parametrize(
+    "env",
+    ["debug"],
+)
+def test_cli_init_with_wrong_env(monkeypatch, tmp_path, kedro_project, env):
+    # "kedro_project" is a pytest.fixture declared in conftest
+    project_path = tmp_path / "fake-project"
+    monkeypatch.chdir(project_path)
+    cli_runner = CliRunner()
+    result = cli_runner.invoke(cli_init, f"--env {env}")
+
+    # A warning message should appear
+    assert f"No env '{env}' found" in result.output
 
 
 # TODO : This is a fake test. add a test to see if ui is properly up
diff --git a/tests/framework/context/test_mlflow_context.py b/tests/framework/context/test_mlflow_context.py
index f56acf9d..26606fee 100644
--- a/tests/framework/context/test_mlflow_context.py
+++ b/tests/framework/context/test_mlflow_context.py
@@ -1,11 +1,9 @@
+import pytest
 import yaml
 from kedro.framework.context import load_context
 
 from kedro_mlflow.framework.context import get_mlflow_config
-
-# def test_get_mlflow_config_outside_kedro_project(tmp_path, config_with_base_mlflow_conf):
-#     with pytest.raises(KedroMlflowConfigError, match="not a valid path to a kedro project"):
-#         get_mlflow_config(project_path=tmp_path,env="local")
+from kedro_mlflow.framework.context.config import KedroMlflowConfigError
 
 
 def _write_yaml(filepath, config):
@@ -56,6 +54,18 @@ def test_get_mlflow_config(mocker, tmp_path, config_dir):
     assert get_mlflow_config(context).to_dict() == expected
 
 
+def test_get_mlflow_config_in_uninitialized_project(mocker, tmp_path, config_dir):
+    # config_with_base_mlflow_conf is a pytest.fixture in conftest
+    mocker.patch("logging.config.dictConfig")
+    mocker.patch("kedro_mlflow.utils._is_kedro_project", return_value=True)
+
+    context = load_context(tmp_path)
+    with pytest.raises(
+        KedroMlflowConfigError, match="No 'mlflow.yml' config file found in environment"
+    ):
+        get_mlflow_config(context)
+
+
 def test_mlflow_config_with_templated_config(mocker, tmp_path, config_dir):
 
     _write_yaml(
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 74dfb7cb..f1b9b463 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -1,11 +1,9 @@
 from pathlib import Path
 
 import pytest
-import yaml
 
 from kedro_mlflow.utils import (
     KEDRO_VERSION,
-    _already_updated,
     _get_project_globals,
     _is_kedro_project,
     _parse_requirements,
@@ -40,35 +38,6 @@ def test_validate_project_path(project_path, project_path_expected):
     assert project_path_result == project_path_expected
 
 
-def test_already_updated(tmp_path):
-    def _write_yaml(filepath, config):
-        filepath.parent.mkdir(parents=True, exist_ok=True)
-        yaml_str = yaml.dump(config)
-        filepath.write_text(yaml_str)
-
-    _write_yaml(
-        tmp_path / "conf" / "base" / "mlflow.yml",
-        dict(
-            mlflow_tracking_uri=(tmp_path / "mlruns").as_posix(),
-            experiment=dict(name="Default", create=True),
-            run=dict(id=None, name=None, nested=True),
-            ui=dict(port=None, host=None),
-        ),
-    )
-    flag = _already_updated(tmp_path)
-    expected_flag = True
-
-    return flag == expected_flag
-
-
-def test_not_yet_updated(tmp_path):
-
-    flag = _already_updated(tmp_path)
-    expected_flag = False
-
-    return flag == expected_flag
-
-
 @pytest.mark.parametrize(
     "project,is_kedro_project",
     (