diff --git a/CHANGELOG.md b/CHANGELOG.md index 238a56fd..d09a86dc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Changed + +- :memo: Change documentation theme to [the pydata sphinx theme](https://pydata-sphinx-theme.readthedocs.io/en/stable/#), and refacotr secion to make them clearer ([#621](https://github.com/Galileo-Galilei/kedro-mlflow/pull/621)) + ## [0.14.0] - 2025-01-28 ### Added diff --git a/docs/conf.py b/docs/conf.py index cdb3ec61..931145db 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -44,11 +44,14 @@ # "sphinx.ext.ifconfig", # "sphinx.ext.viewcode", # "nbsphinx", + "sphinx_design", # responsive web component support "sphinx_copybutton", "sphinx_markdown_tables", "myst_parser", ] +myst_enable_extensions = ["colon_fence"] + # enable autosummary plugin (table of contents for modules/classes/class # methods) autosummary_generate = True @@ -71,12 +74,48 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. -# -html_theme = "sphinx_rtd_theme" + +html_theme = "pydata_sphinx_theme" # see: https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/layout.html # useful to create dropdown with the name of the directory as the section name # see https://stackoverflow.com/questions/36925871/toctree-nested-drop-down: -html_theme_options = {"collapse_navigation": False} +html_theme_options = { + "logo": { + "image_light": "source/imgs/logo.png", + "image_dark": "source/imgs/logo.png", + }, + # https://pydata-sphinx-theme.readthedocs.io/en/stable/user_guide/header-links.html#fontawesome-icons + "icon_links": [ + { + "name": "GitHub", + "url": "https://github.com/Galileo-Galilei/kedro-mlflow", + "icon": "fa-brands fa-github", + }, + { + "name": "PyPI", + "url": "https://pypi.org/project/kedro-mlflow/", + "icon": "fa-brands fa-python", + }, + { + "name": "Slack", + "url": "https://kedro-org.slack.com/", + "icon": "fa-brands fa-slack", + }, + ], + "navbar_start": ["navbar-logo"], # "version-switcher" to be configured + "navbar_align": "content", + "header_links_before_dropdown": 4, + "secondary_sidebar_items": ["page-toc", "edit-this-page", "sourcelink"], + "use_edit_page_button": True, +} +html_context = { + "github_user": "Galileo-Galilei", + "github_repo": "kedro-mlflow", + "github_version": "master", + "doc_path": "docs/", # why not "docs/source/"? + "default_mode": "light", +} +html_sidebars = {"index": []} myst_heading_anchors = 5 diff --git a/docs/index.md b/docs/index.md new file mode 100644 index 00000000..ecea0920 --- /dev/null +++ b/docs/index.md @@ -0,0 +1,102 @@ +--- +myst: + html_meta: + "description lang=en": | + Top-level documentation for kedro-mlflow, with links to the rest + of the site. +html_theme.sidebar_secondary.remove: true +--- + +# The kedro-mlflow plugin + +```kedro-mlflow``` is a Kedro [plugin](https://docs.kedro.org/en/stable/extend_kedro/plugins.html) to integrate [MLflow](https://www.mlflow.org/) effortlessly inside [Kedro](https://kedro.org/) projects. + +Its main features are **automatic parameters tracking**, **datasets tracking as artifacts**, Kedro **pipelines packaging** and serving and **automatic synchronisation between training and inference** pipelines. It aims at providing a complete yet modular framework for high reproducibility of machine learning experiments and ease of deployment. + +::::{grid} 1 1 2 2 +:gutter: 3 + +:::{grid-item-card} +:link: source/03_experiment_tracking/01_experiment_tracking/01_configuration.html +:link-type: url +:class-header: bg-light + +{fas}`flask fa-xl;pst-color-primary` Experiment tracking +^^^ + +Track the **parameters**, **metrics**, **artifacts** and **models** of your kedro pipelines for reproducibility. +::: + +:::{grid-item-card} +:link: source/04_pipeline_as_model/01_pipeline_as_custom_model/01_mlflow_models.html +:link-type: url +:class-header: bg-light + +{fas}`rocket fa-xl;pst-color-primary` Pipeline as model +^^^ + +Package any kedro pipeline to a **custom mlflow model** for deployment and serving. The custom model for an inference pipeline can be **registered** in mlflow **automatically** at the end of each training in a *scikit-learn* like way. +::: + +:::: + +## Resources + +::::{grid} 1 1 3 3 +:gutter: 3 + +:::{grid-item-card} +:link: source/02_gettnig_started/01_installation/01_installation.html +:link-type: url +:class-header: bg-light + +{fas}`fa-solid fa-graduation-cap fa-xl;pst-color-primary` Quickstart +^^^ + +Get started in **1 mn** with experiment tracking! ++++ +Try out {fas}`arrow-right fa-xl` +::: + +:::{grid-item-card} +:link: https://github.com/Galileo-Galilei/kedro-mlflow-tutorial +:link-type: url +:class-header: bg-light + +{fas}`fa-solid fa-chalkboard-user fa-xl;pst-color-primary` Advanced tutorial +^^^ + +The ``kedro-mlflow-tutorial`` github repo contains a step-by-step tutorial to learn how to use kedro-mlflow as a mlops framework! + ++++ +Try on github {fab}`github;fa-xl` +::: + +:::{grid-item-card} +:link: https://www.youtube.com/watch?v=Az_6UKqbznw +:link-type: url +:class-header: bg-light + +{fas}`fa-solid fa-video fa-xl;pst-color-primary` Demonstration in video +^^^ + +A youtube video by the kedro team to introduce the plugin, with live coding. + ++++ +See on youtube {fab}`youtube;fa-xl` +::: + +:::: + +```{toctree} +--- +maxdepth: 1 +hidden: true +--- +source/01_introduction/index +source/02_getting_started/index +source/03_experiment_tracking/index +source/04_pipeline_as_model/index +source/05_API/index +Changelog +``` diff --git a/docs/index.rst b/docs/index.rst deleted file mode 100644 index f7f4e8c0..00000000 --- a/docs/index.rst +++ /dev/null @@ -1,43 +0,0 @@ -.. ``kedro-mlflow`` documentation master file, created by - sphinx-quickstart on Mon Jul 13 14:21:13 2020. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. - -Welcome to kedro-mlflow's documentation! -======================================== - -.. toctree:: - :maxdepth: -1 - :caption: Getting started - - Introduction - Installation - Quickstart in 1 mn - -.. toctree:: - :maxdepth: -1 - :caption: Experiment tracking - - In a kedro project - In a notebook - -.. toctree:: - :maxdepth: -1 - :caption: Pipeline serving - - Custom mlflow model for kedro pipelines - A mlops framework for continuous model serving - -.. toctree:: - :maxdepth: -1 - :caption: Technical documentation - - Python objects - API documentation - -Indices and tables -================== - -* :ref:`genindex` -* :ref:`modindex` -* :ref:`search` diff --git a/docs/source/01_introduction/01_introduction.md b/docs/source/01_introduction/01_introduction.md index 323cdee3..d29916f7 100644 --- a/docs/source/01_introduction/01_introduction.md +++ b/docs/source/01_introduction/01_introduction.md @@ -1,4 +1,4 @@ -# Introduction +# Introduction to ``kedro`` and ``mlflow`` ## What is ``Kedro``? diff --git a/docs/source/01_introduction/02_motivation.md b/docs/source/01_introduction/02_motivation.md index 5b7caeb4..9e1cb8f7 100644 --- a/docs/source/01_introduction/02_motivation.md +++ b/docs/source/01_introduction/02_motivation.md @@ -1,8 +1,8 @@ -# Motivation +# Motivation behind the plugin ## When should I use kedro-mlflow? -Basically, you should use `kedro-mlflow` in **any `Kedro` project which involves machine learning** / deep learning. As stated in the [introduction](./01_introduction.md), `Kedro`'s current versioning (as of version `0.16.6`) is not sufficient for machine learning projects: it lacks a UI and a ``run`` management system. Besides, the `KedroPipelineModel` ability to serve a kedro pipeline as an API or a batch in one line of code is a great addition for collaboration and transition to production. +Basically, you should use `kedro-mlflow` in **any `Kedro` project which involves machine learning** / deep learning. As stated in the [introduction](./01_introduction.html), `Kedro`'s current versioning (as of version `0.19.10`) is not sufficient for machine learning projects: it lacks a UI and a ``run`` management system. Besides, the `KedroPipelineModel` ability to serve a kedro pipeline as an API or a batch in one line of code is a great addition for collaboration and transition to production. If you do not use ``Kedro`` or if you do pure data processing which does not involve *machine learning*, this plugin is not what you are seeking for ;-) @@ -47,5 +47,5 @@ Above implementations have the advantage of being very straightforward and *mlfl `kedro-mlflow` does not currently provide interface to set tags outside a Kedro ``Pipeline``. Some of above decisions are subject to debate and design decisions (for instance, metrics are often updated in a loop during each epoch / training iteration and it does not always make sense to register the metric between computation steps, e.g. as a an I/O operation after a node run). ```{note} -You do **not** need any ``MLProject`` file to use mlflow inside your Kedro project. As seen in the [introduction](./01_introduction.md), this file overlaps with Kedro configuration files. +You do **not** need any ``MLProject`` file to use mlflow inside your Kedro project. As seen in the [introduction](./01_introduction.html), this file overlaps with Kedro configuration files. ``` diff --git a/docs/source/01_introduction/index.md b/docs/source/01_introduction/index.md new file mode 100644 index 00000000..9e069551 --- /dev/null +++ b/docs/source/01_introduction/index.md @@ -0,0 +1,8 @@ +# Introduction + +```{toctree} +:caption: Introduction to kedro-mlflow + +01_introduction +02_motivation +``` diff --git a/docs/source/01_introduction/index.rst b/docs/source/01_introduction/index.rst deleted file mode 100644 index e4ce05c3..00000000 --- a/docs/source/01_introduction/index.rst +++ /dev/null @@ -1,8 +0,0 @@ -Introduction -============ - -.. toctree:: - :maxdepth: 4 - - Kedro vs Mlflow <01_introduction.md> - Motivation behind the plugin <02_motivation.md> diff --git a/docs/source/02_installation/01_installation.md b/docs/source/02_getting_started/01_installation/01_installation.md similarity index 71% rename from docs/source/02_installation/01_installation.md rename to docs/source/02_getting_started/01_installation/01_installation.md index 8283985b..fcd7f5df 100644 --- a/docs/source/02_installation/01_installation.md +++ b/docs/source/02_getting_started/01_installation/01_installation.md @@ -4,7 +4,7 @@ ### Create a virtual environment -I strongly recommend to use ``conda`` (a package manager) to create an environment in order to avoid version conflicts between packages. +I strongly recommend to create a virtual environment in order to avoid version conflicts between packages. I use ``conda`` in this tutorial. I also recommend to read [Kedro installation guide](https://kedro.readthedocs.io/en/latest/get_started/install.html) to set up your Kedro project. @@ -12,7 +12,7 @@ I also recommend to read [Kedro installation guide](https://kedro.readthedocs.io conda create -n python=<3.[6-8].X> ``` -For the rest of the section, we assume the envirpnment is activated: +For the rest of the section, we assume the environment is activated: ```console conda activate @@ -42,9 +42,11 @@ Requires: pip-tools, cachetools, fsspec, toposort, anyconfig, PyYAML, click, plu ## Install the plugin -There are version of the plugin compatible up to ``kedro>=0.16.0`` and ``mlflow>=0.8.0``. ``kedro-mlflow`` stops adding features to a minor version 2 to 6 months after a new kedro release. +There are versions of the plugin compatible up to ``kedro>=0.16.0`` and ``mlflow>=0.8.0``. ``kedro-mlflow`` stops adding features to a minor version 2 to 6 months after a new kedro release. -### Install from PyPI +::::{tab-set} + +:::{tab-item} Install with pip You can install ``kedro-mlflow`` plugin from ``PyPi`` with `pip`: @@ -52,17 +54,34 @@ You can install ``kedro-mlflow`` plugin from ``PyPi`` with `pip`: pip install --upgrade kedro-mlflow ``` -### Install from sources +::: + +:::{tab-item} Install with conda / mamba / micromamba -You may want to install the master branch which has unreleased features: +You can install ``kedro-mlflow`` plugin with `conda` from the ``conda-forge`` channel: + +```console +conda install kedro-mlflow -c conda-forge +``` + +::: + +:::{tab-item} Install from github + +You may want to install the master branch from source which has unreleased features: ```console pip install git+https://github.com/Galileo-Galilei/kedro-mlflow.git ``` +::: + +:::: + + ## Check the installation -Type ``kedro info`` in a terminal to check the installation. If it has succeeded, you should see the following ascii art: +Enter ``kedro info`` in a terminal with the activated virtual env to check the installation. If it has succeeded, you should see the following ascii art: ```console _ _ diff --git a/docs/source/02_installation/02_setup.md b/docs/source/02_getting_started/01_installation/02_setup.md similarity index 73% rename from docs/source/02_installation/02_setup.md rename to docs/source/02_getting_started/01_installation/02_setup.md index 3744ead7..ccb6df08 100644 --- a/docs/source/02_installation/02_setup.md +++ b/docs/source/02_getting_started/01_installation/02_setup.md @@ -1,21 +1,21 @@ # Initialize your Kedro project -This section assume that [you have installed `kedro-mlflow` in your virtual environment](./01_installation.md). +This section assume that [you have installed `kedro-mlflow` in your virtual environment](./01_installation.html). ## Create a kedro project This plugin must be used in an existing kedro project. If you do not have a kedro project yet, you can create it with ``kedro new`` command. [See the kedro docs for a tutorial](https://kedro.readthedocs.io/en/latest/get_started/new_project.html). -If you do not have a real-world project, you can use a kedro example and [follow the "Quickstart in 1 mn" example](../03_quickstart/01_example_project.md) to make a demo of this plugin out of the box. +If you do not have a real-world project, you can use a kedro example and [follow the "Quickstart in 1 mn" example](../02_quickstart/01_example_project.html) to make a demo of this plugin out of the box. ## Activate `kedro-mlflow` in your kedro project -In order to use the ``kedro-mlflow`` plugin, you need to setup its configuration and declare its hooks. Those 2 actions are detailled in the following paragraphs. +In order to use the ``kedro-mlflow`` plugin, you need to setup its configuration and declare its hooks. ### Setting up the ``kedro-mlflow`` configuration file -``kedro-mlflow`` is [configured](../30_python_objects/05_Configuration.md) through an ``mlflow.yml`` file. The recommended way to initialize the `mlflow.yml` is by using [the ``kedro-mlflow`` CLI](../30_python_objects/04_CLI.md), but you can create it manually. +``kedro-mlflow`` is [configured](../../05_API/01_python_objects/05_Configuration.html) through an ``mlflow.yml`` file. The recommended way to initialize the `mlflow.yml` is by using [the ``kedro-mlflow`` CLI](../../05_API/01_python_objects/04_CLI.html), but you can create it manually. ```{note} Since ``kedro-mlflow>=0.11.2``, the configuration file is optional. However, the plugin will use default ``mlflow`` configuration. Specifically, the runs will be stored in a ``mlruns`` folder at the root fo the kedro project since no ``mlflow_tracking_uri`` is configured. @@ -49,13 +49,19 @@ kedro mlflow init --env= ``kedro_mlflow`` hooks implementations must be registered with Kedro. There are 2 ways of registering [hooks](https://kedro.readthedocs.io/en/latest/hooks/introduction.html). -**Note that you must register the hook provided by kedro-mlflow** (``MlflowHook``) to make the plugin work. +```{important} +You must register the hook provided by ``kedro-mlflow`` (the ``MlflowHook``) to make the plugin work. +``` + +::::{tab-set} -#### Declaring hooks through auto-discovery (for `kedro>=0.16.4`) [Default behaviour] +:::{tab-item} `kedro>=0.16.4` - auto-discovery If you use `kedro>=0.16.4`, `kedro-mlflow` hooks are auto-registered automatically by default without any action from your side. You can [disable this behaviour](https://kedro.readthedocs.io/en/latest/hooks/introduction.html#disable-auto-registered-plugins-hooks) in your `settings.py` file. -#### Declaring hooks statically in settings.py +::: + +:::{tab-item} `kedro>=0.16.0, <=0.16.3` - register in ``settings.py`` If you have turned off plugin automatic registration, you can register its hooks manually by [adding them to ``settings.py``](https://kedro.readthedocs.io/en/latest/hooks/introduction.html#registering-your-hook-implementations-with-kedro): @@ -65,3 +71,7 @@ from kedro_mlflow.framework.hooks import MlflowHook HOOKS = (MlflowHook(),) ``` + +::: + +:::: diff --git a/docs/source/02_installation/03_migration_guide.md b/docs/source/02_getting_started/01_installation/03_migration_guide.md similarity index 95% rename from docs/source/02_installation/03_migration_guide.md rename to docs/source/02_getting_started/01_installation/03_migration_guide.md index 33b8c8fb..aa02e0e2 100644 --- a/docs/source/02_installation/03_migration_guide.md +++ b/docs/source/02_getting_started/01_installation/03_migration_guide.md @@ -64,7 +64,7 @@ This is not necessary: the mlflow config is automatically set up when the contex - Update the ``mlflow.yml`` configuration file with ``kedro mlflow init --force`` command - `pipeline_ml_factory(pipeline_ml=,...)` (resp. `KedroPipelineModel(pipeline_ml=, ...)`) first argument is renamed `pipeline`. Change the call to `pipeline_ml_factory(pipeline=)` (resp. `KedroPipelineModel(pipeline=, ...)`). -- Change the call from `pipeline_ml_factory(..., model_signature=, conda_env=, model_name=)` to `` pipeline_ml_factory(..., log_model_kwargs=dict(signature=, conda_env=, artifact_path=})`. Notice that the arguments are renamed to match mlflow's and they are passed as a dict in `log_model_kwargs`. +- Change the call from `pipeline_ml_factory(..., model_signature=, conda_env=, model_name=)` to ``pipeline_ml_factory(..., log_model_kwargs=dict(signature=, conda_env=, artifact_path=})`. Notice that the arguments are renamed to match mlflow's and they are passed as a dict in `log_model_kwargs`. ## Migration from 0.6.x to 0.7.x diff --git a/docs/source/02_getting_started/02_quickstart/00_intro_tutorial.md b/docs/source/02_getting_started/02_quickstart/00_intro_tutorial.md new file mode 100644 index 00000000..459c1067 --- /dev/null +++ b/docs/source/02_getting_started/02_quickstart/00_intro_tutorial.md @@ -0,0 +1,5 @@ +# Goal of the tutorial + +This "Getting started" section demonstrates how to use some basic functionalities of `kedro-mlflow` in an end to end example. It is supposed to be simple and self-contained and is partially redundant with other sections, but far from complete. + +The **section only focuses on experiment tracking** part and **does _not_ show the "machine learning framework" abilities** of the plugin. The goal is to give to a new user a quick glance to some capabiltiies so that he can decide whether the plugin suits its needs or not. It is totally worth checking the other sections to have a much more complete overview of what this plugin provides. diff --git a/docs/source/03_quickstart/01_example_project.md b/docs/source/02_getting_started/02_quickstart/01_example_project.md similarity index 89% rename from docs/source/03_quickstart/01_example_project.md rename to docs/source/02_getting_started/02_quickstart/01_example_project.md index 2c4b652e..f11f7abf 100644 --- a/docs/source/03_quickstart/01_example_project.md +++ b/docs/source/02_getting_started/02_quickstart/01_example_project.md @@ -20,13 +20,18 @@ We use this project because: - it is compatible with older version of ``Kedro`` so newcomers are used to it - it is maintained by ``Kedro`` maintainers and therefore enforces some best practices. -### Installation with ``kedro>=0.19.0`` + +::::{tab-set} + +:::{tab-item} ``kedro>=0.19.0`` ```{warning} For ``kedro>=0.19.0``, ``pandas-iris`` starter has been removed. It is recommended to install [``spaceflights-pandas`` starter instead](https://github.com/kedro-org/kedro-starters/tree/main/spaceflights-pandas). ``` -### Installation with ``kedro>=0.16.3`` +::: + +:::{tab-item} ``kedro>=0.16.3,<0.19`` The default starter is now called "pandas-iris". In a new console, enter: @@ -58,7 +63,9 @@ Lowercase is recommended. Package name must start with a letter or underscore. [kedro_mlflow_example]: km_example ``` -### Installation with ``kedro>=0.16.0, <=0.16.2`` +::: + +:::{tab-item} ``kedro>=0.16.0, <=0.16.2`` With older versions of ``Kedro``, the starter option is not available, but this ``kedro new`` provides an "Include example" question. Answer ``y`` to this question to get the same starter as above. In a new console, enter: @@ -96,6 +103,10 @@ Good for first-time users. (default=N) [y/N]: y ``` +::: + +:::: + ## Install dependencies Move to the project directory: @@ -104,7 +115,11 @@ Move to the project directory: cd km-example ``` -Install the project dependencies (**Warning: Do not use ``kedro install`` commands [does not install the packages in your activated environment](https://github.com/quantumblacklabs/kedro/issues/589)**): +Install the project dependencies : + +```{warning} +Do not use ``kedro install`` commands which [does not install the packages in your activated environment](https://github.com/quantumblacklabs/kedro/issues/589). It has been removed in ``kedro>=0.19``. +``` ```console pip install -r src/requirements.txt diff --git a/docs/source/03_quickstart/02_first_steps.md b/docs/source/02_getting_started/02_quickstart/02_first_steps.md similarity index 90% rename from docs/source/03_quickstart/02_first_steps.md rename to docs/source/02_getting_started/02_quickstart/02_first_steps.md index 24cf114d..4eb10f92 100644 --- a/docs/source/03_quickstart/02_first_steps.md +++ b/docs/source/02_getting_started/02_quickstart/02_first_steps.md @@ -6,6 +6,7 @@ This step is optional if you use ``kedro>=0.11.2``. If you do not create a ``mlflow.yml`` configuration file, ``kedro-mlflow`` will use the defaults. However this is heavily recommended because in professional setup you often need some specific enterprise configuration. ``` +:::{dropdown} (Optional) Create a configuration file You can initialize your project with the plugin-specific configuration file with this command: ```console @@ -20,12 +21,16 @@ You will see the following message: The ``conf/local`` folder is updated and you can see the `mlflow.yml` file: -![initialized_project](../imgs/initialized_project.png) +![initialized_project](../../imgs/initialized_project.png) +::: -*Optional: If you have configured your own mlflow server, you can specify the tracking uri in the ``mlflow.yml`` (replace the highlighted line below):* +:::{dropdown} (Optional) Specify the tracking uri -![mlflow_yml](../imgs/mlflow_yml.png) +If you have configured your own mlflow server, you can specify the tracking uri in the ``mlflow.yml`` (replace the highlighted line below): + +![mlflow_yml](../../imgs/mlflow_yml.png) +::: ## Run the pipeline @@ -67,7 +72,7 @@ If the pipeline executes properly, you should see the following log: Since we have kept the default value of the ``mlflow.yml``, the tracking uri (the place where runs are recorded) is a local ``mlruns`` folder which has just been created with the execution: -![once_run_project](../imgs/once_run_project.png) +![once_run_project](../../imgs/once_run_project.png) ## Open the UI @@ -81,13 +86,13 @@ And open the following adress in your favorite browser ``http://localhost:5000/`` -![mlflow_host_page](../imgs/mlflow_host_page.png) +![mlflow_host_page](../../imgs/mlflow_host_page.png) Click now on the last run executed, you will land on this page: -![mlflow_run](../imgs/mlflow_run.png) +![mlflow_run](../../imgs/mlflow_run.png) -### Parameters versioning +### Parameters tracking Note that the parameters have been recorded *automagically*. Here, two parameters format are used: @@ -104,18 +109,16 @@ kedro viz Open your browser at the following adress: -```{browser} -http://localhost:4141/ +```{button-link} http://localhost:4141/ ``` You should see the following graph: -![kedro_viz_params](../imgs/kedro_viz_params.png) +![kedro_viz_params](../../imgs/kedro_viz_params.png) which indicates clearly which parameters are logged (in the red boxes with the "parameter" icon). - -### Artifacts +### Artifacts tracking With this run, artifacts are empty. This is expected: mlflow does not know what it should log and it will not log all your data by default. However, you want to save your model (at least) or your run is likely useless! @@ -150,7 +153,7 @@ example_model: Rerun the pipeline (with `kedro run`), and reopen the UI. Select the last run and see that the file was uploaded: -![run_with_artifact](../imgs/run_with_artifact.png) +![run_with_artifact](../../imgs/run_with_artifact.png) This works for any type of file (including images with ``MatplotlibWriter``) and the UI even offers a preview for ``png`` and ``csv``, which is really convenient to compare runs. diff --git a/docs/source/02_getting_started/index.md b/docs/source/02_getting_started/index.md new file mode 100644 index 00000000..ff01e5d0 --- /dev/null +++ b/docs/source/02_getting_started/index.md @@ -0,0 +1,17 @@ +# {octicon}`mortar-board` Getting started + +```{toctree} +:caption: Installation + +01_installation/01_installation +01_installation/02_setup +01_installation/03_migration_guide +``` + +```{toctree} +:caption: Quickstart + +02_quickstart/00_intro_tutorial +02_quickstart/01_example_project +02_quickstart/02_first_steps +``` diff --git a/docs/source/02_installation/index.rst b/docs/source/02_installation/index.rst deleted file mode 100644 index 968517fd..00000000 --- a/docs/source/02_installation/index.rst +++ /dev/null @@ -1,10 +0,0 @@ -Introduction -============ - -.. toctree:: - :maxdepth: 4 - - - Install the plugin <01_installation.md> - Setup your kedro project <02_setup.md> - Migration guide between versions <03_migration_guide.md> diff --git a/docs/source/10_experiment_tracking/01_configuration.md b/docs/source/03_experiment_tracking/01_experiment_tracking/01_configuration.md similarity index 96% rename from docs/source/10_experiment_tracking/01_configuration.md rename to docs/source/03_experiment_tracking/01_experiment_tracking/01_configuration.md index de17119f..19544c81 100644 --- a/docs/source/10_experiment_tracking/01_configuration.md +++ b/docs/source/03_experiment_tracking/01_experiment_tracking/01_configuration.md @@ -1,12 +1,12 @@ # Configure mlflow inside your project -We assume in this section that you have [installed `kedro-mlflow` in your virtual environment](../02_installation/01_installation.md) and you have [configured your project](../02_installation/02_setup.md) with a `mlflow.yml` configuration file and hooks declaration. +We assume in this section that you have [installed `kedro-mlflow` in your virtual environment](../../02_getting_started/01_installation/01_installation.html) and you have [configured your project](../../0_getting_started/02_installation/02_setup.html) with a `mlflow.yml` configuration file and hooks declaration. ## Context: mlflow tracking under the hood -Mlflow is composed of four modules which are described in the [introduction section](../01_introduction/01_introduction.md). The main module is "tracking". The goal of this module is to keep track of every varying parameters across different code execution (parameters, metrics and artifacts). The following schema describes how this modules operates under the hood: +Mlflow is composed of four modules which are described in the [introduction section](../../01_introduction/01_introduction.html). The main module is "tracking". The goal of this module is to keep track of every varying parameters across different code execution (parameters, metrics and artifacts). The following schema describes how this modules operates under the hood: -![mlflow_tracking_schema](../imgs/mlflow_tracking_schema.png) +![mlflow_tracking_schema](../../imgs/mlflow_tracking_schema.png) Basically, this schema shows that mlflow separates WHERE the artifacts are logged from HOW they are logged inside your code. You need to setup your mlflow tracking server separately from your code, and then each logging will send a request to the tracking server to store the elements you want to track in the appropriate location. The advantage of such a setup are numerous: diff --git a/docs/source/10_experiment_tracking/02_version_parameters.md b/docs/source/03_experiment_tracking/01_experiment_tracking/02_version_parameters.md similarity index 55% rename from docs/source/10_experiment_tracking/02_version_parameters.md rename to docs/source/03_experiment_tracking/01_experiment_tracking/02_version_parameters.md index 41cb0b8f..6e87dad1 100644 --- a/docs/source/10_experiment_tracking/02_version_parameters.md +++ b/docs/source/03_experiment_tracking/01_experiment_tracking/02_version_parameters.md @@ -1,25 +1,29 @@ -# Parameters versioning +# Track parameters -## Automatic parameters versioning +## Automatic parameters tracking -Parameters versioning is automatic when the ``MlflowHook`` is added to [the hook list of the ``ProjectContext``](https://kedro-mlflow.readthedocs.io/en/latest/source/02_installation/02_setup.html#declaring-kedro-mlflow-hooks). The `mlflow.yml` configuration file has a parameter called ``flatten_dict_params`` which enables to [log as distinct parameters the (key, value) pairs of a ```Dict`` parameter](../30_python_objects/02_Hooks.md). +Parameters tracking is automatic when the ``MlflowHook`` is added to [the hook list of the ``ProjectContext``](../../02_getting_started/01_installation/02_setup.html). The `mlflow.yml` configuration file has a parameter called ``flatten_dict_params`` which enables to [log as distinct parameters the (key, value) pairs of a ```Dict`` parameter](../../05_API/01_python_objects/02_Hooks.html). You **do not need any additional configuration** to benefit from parameters versioning. -## How does ``MlflowHook`` operates under the hood? +```{hint} -The [medium post which introduces hooks](https://medium.com/quantumblack/introducing-kedro-hooks-fd5bc4c03ff5) explains in detail the differents execution steps ``Kedro`` executes when the user calls the ``kedro run`` command. +**How does ``MlflowHook`` operates under the hood?** -![](../imgs/hook_registration_process.png) +The [medium post which introduces hooks](https://medium.com/quantumblack/introducing-kedro-hooks-fd5bc4c03ff5) explains in detail the steps ``Kedro`` executes when the user calls the ``kedro run`` command. + +![](../../imgs/hook_registration_process.png) The `MlflowHook` registers the parameters before each node (entry point number 3 on above picture) by calling `mlflow.log_parameter(param_name, param_value)` on each parameters of the node. +``` + ## Frequently asked questions -### Will parameters be recorded if the pipeline fails during execution? +:::{dropdown} How are parameters detected by the plugin? +The hook **detects parameters through their prefix ``params:`` or the value ``parameters``**. These are the [reserved keywords used by Kedro to define parameters](https://docs.kedro.org/en/stable/configuration/parameters.html#how-to-use-parameters) in the ``pipeline.py`` file(s). +::: +:::{dropdown} Will parameters be recorded if the pipeline fails during execution? The parameters are registered node by node (and not in a single batch at the beginning of the execution). If the pipeline fails in the middle of its execution, the **parameters of the nodes who have been run will be recorded**, but **not the parameters of non executed nodes**. - -### How are parameters detected by the plugin? - -The hook **detects parameters through their prefix ``params:`` or the value ``parameters``**. These are the [reserved keywords used by Kedro to define parameters](https://docs.kedro.org/en/stable/configuration/parameters.html#how-to-use-parameters) in the ``pipeline.py`` file(s). +::: diff --git a/docs/source/10_experiment_tracking/03_version_datasets.md b/docs/source/03_experiment_tracking/01_experiment_tracking/03_version_datasets.md similarity index 89% rename from docs/source/10_experiment_tracking/03_version_datasets.md rename to docs/source/03_experiment_tracking/01_experiment_tracking/03_version_datasets.md index 7e876547..e3fd7dc3 100644 --- a/docs/source/10_experiment_tracking/03_version_datasets.md +++ b/docs/source/03_experiment_tracking/01_experiment_tracking/03_version_datasets.md @@ -1,4 +1,4 @@ -# Versioning Kedro DataSets +# Track Datasets as artifacts ## What is artifact tracking? @@ -10,16 +10,16 @@ Mlflow defines artifacts as "any data a user may want to track during code execu Artifacts are a very flexible and convenient way to "bind" any data type to your code execution. Mlflow has a two-step process for such binding: 1. Persist the data locally in the desired file format -2. Upload the data to the [artifact store](./01_configuration.md) +2. Upload the data to the [artifact store](./01_configuration.html) -## How to version data in a kedro project? +## How to track data in a kedro project? ``kedro-mlflow`` introduces a new ``AbstractDataset`` called ``MlflowArtifactDataset``. It is a wrapper for any ``AbstractDataset`` which decorates the underlying dataset ``save`` method and logs the file automatically in mlflow as an artifact each time the ``save`` method is called. Since it is an ``AbstractDataset``, it can be used with the YAML API. Assume that you have the following entry in the ``catalog.yml``: ```yaml -my_dataset_to_version: +my_dataset_to_track: type: pandas.CSVDataset filepath: /path/to/a/destination/file.csv ``` @@ -27,7 +27,7 @@ my_dataset_to_version: You can change it to: ```yaml -my_dataset_to_version: +my_dataset_to_track: type: kedro_mlflow.io.artifacts.MlflowArtifactDataset dataset: type: pandas.CSVDataset # or any valid kedro DataSet @@ -38,7 +38,8 @@ and this dataset will be automatically versioned in each pipeline execution. ## Frequently asked questions -### Can I pass extra parameters to the ``MlflowArtifactDataset`` for finer control? + +:::{dropdown} Can I pass extra parameters to the ``MlflowArtifactDataset`` for finer control? The ``MlflowArtifactDataset`` takes a ``dataset`` argument which is a python dictionary passed to the ``__init__`` method of the dataset declared in ``type``. It means that you can pass any argument accepted by the underlying dataset in this dictionary. If you want to pass ``load_args`` and ``save_args`` in the previous example, add them in the ``dataset`` argument: @@ -55,7 +56,9 @@ my_dataset_to_version: # ... any other valid arguments for dataset ``` -### Can I use the ``MlflowArtifactDataset`` in interactive mode? +::: + +:::{dropdown} Can I use the ``MlflowArtifactDataset`` in interactive mode? Like all Kedro ``AbstractDataset``, ``MlflowArtifactDataset`` is callable in the python API: @@ -71,8 +74,10 @@ csv_dataset = MlflowArtifactDataSet( ) csv_dataset.save(data=pd.DataFrame({"a": [1, 2], "b": [3, 4]})) ``` +::: -### How do I upload an artifact to a non local destination (e.g. an S3 or blog storage)? + +:::{dropdown} How do I upload an artifact to a non local destination (e.g. an S3 or blog storage)? The location where artifact will be stored does not depends of the logging function but rather on the artifact store specified when configuring the mlflow server. Read mlflow documentation to see: @@ -86,8 +91,9 @@ You still need to specify a **local** path for the underlying dataset (even to s ``` You can refer to [this issue](https://github.com/Galileo-Galilei/kedro-mlflow/issues/15) for further details. +::: -### Can I log an artifact in a specific run? +:::{dropdown} Can I log an artifact in a specific run? The ``MlflowArtifactDataset`` has an extra attribute ``run_id`` which specifies the run you will log the artifact in. **Be cautious, because this argument will take precedence over the current run** when you call ``kedro run``, causing the artifact to be logged in another run that all the other data of the run. @@ -100,7 +106,9 @@ my_dataset_to_version: run_id: 13245678910111213 # a valid mlflow run to log in. If None, default to active run ``` -### Can I reload an artifact from an existing run to use it in another run ? +::: + +:::{dropdown} Can I reload an artifact from an existing run to use it in another run ? You may want to reuse th artifact of a previous run to reuse it in another one, e.g. to continue training from a pretrained model, or to select the best model among several runs created during an hyperparamter tuning. The ``MlflowArtifactDataset`` has an extra attribute ``run_id`` you can use to specify from which run you will load the artifact from. **Be cautious**, because: - this argument will take precedence over the current run** when you call ``kedro run``, causing the artifact to be loaded from another run that all the other data of the run @@ -114,8 +122,9 @@ my_dataset_to_reload: filepath: /path/to/a/local/destination/file.csv # must be a local filepath, no matter what is your actual mlflow storage (S3 or other) run_id: 13245678910111213 # a valid mlflow run with the existing artifact. It must be named "file.csv" ``` +::: -### Can I create a remote folder/subfolders architecture to organize the artifacts? +:::{dropdown} Can I create a remote folder/subfolders architecture to organize the artifacts? The ``MlflowArtifactDataset`` has an extra argument ``artifact_path`` which specifies a remote subfolder where the artifact will be logged. It must be a relative path. @@ -129,3 +138,5 @@ my_dataset_to_version: filepath: /path/to/a/local/destination/file.csv artifact_path: reporting # relative path where the remote artifact must be stored. if None, saved in root folder. ``` + +::: diff --git a/docs/source/10_experiment_tracking/04_version_models.md b/docs/source/03_experiment_tracking/01_experiment_tracking/04_version_models.md similarity index 94% rename from docs/source/10_experiment_tracking/04_version_models.md rename to docs/source/03_experiment_tracking/01_experiment_tracking/04_version_models.md index 1341869f..16ae4815 100644 --- a/docs/source/10_experiment_tracking/04_version_models.md +++ b/docs/source/03_experiment_tracking/01_experiment_tracking/04_version_models.md @@ -1,4 +1,4 @@ -# Version model +# Track models ## What is model tracking? @@ -21,13 +21,13 @@ my_sklearn_model: flavor: mlflow.sklearn ``` -More informations on available parameters are available in the [dedicated section](../30_python_objects/01_DataSets.md#mlflowmodeltrackingdataset). +More informations on available parameters are available in the [dedicated section](../../05_API/01_python_objects/01_Datasets.html#mlflowmodeltrackingdataset). You are now able to use ``my_sklearn_model`` in your nodes. Since this model is registered in mlflow, you can also leverage the [mlflow model serving abilities](https://www.mlflow.org/docs/latest/cli.html#mlflow-models-serve) or [predicting on batch abilities](https://www.mlflow.org/docs/latest/cli.html#mlflow-models-predict), as well as the [mlflow models registry](https://www.mlflow.org/docs/latest/model-registry.html) to manage the lifecycle of this model. ## Frequently asked questions -### How is it working under the hood? +:::{dropdown} How is it working under the hood? **For ``MlflowModelTrackingDataset``** @@ -40,8 +40,9 @@ During load, the model is retrieved from the ``run_id`` if specified, else it is During save, a model object from node output is saved locally under specified ``filepath`` using ``save_model`` function of the specified ``flavor``. When model is loaded, the latest version stored locally is read using ``load_model`` function of the specified ``flavor``. You can also load a model from a specific kedro run by specifying the `version` argument to the constructor. +::: -### How can I track a custom MLflow model flavor? +:::{dropdown} How can I track a custom MLflow model flavor? To track a custom MLflow model flavor you need to set the `flavor` parameter to import the module of your custom flavor and to specify a [pyfunc workflow](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html#pyfunc-create-custom-workflows) which can be set either to `python_model` or `loader_module`. The former is the more high level and user friendly and is [recommend by mlflow](https://mlflow.org/docs/latest/python_api/mlflow.pyfunc.html#which-workflow-is-right-for-my-use-case) while the latter offer more control. We haven't tested the integration in `kedro-mlflow` of this second workflow extensively, and it should be used with caution. @@ -52,8 +53,12 @@ my_custom_model: pyfunc_workflow: python_model # or loader_module ``` +::: + ### How can I save model locally and log it in MLflow in one step? +:::{dropdown} How can I save model locally and log it in MLflow in one step? + If you want to save your model both locally and remotely within the same run, you can leverage `MlflowArtifactDataset`: ```yaml @@ -66,3 +71,5 @@ sklearn_model: ``` This might be useful if you want to always read the lastest model saved locally and log it to MLflow each time the new model is being trained for tracking purpose. + +::: diff --git a/docs/source/10_experiment_tracking/05_version_metrics.md b/docs/source/03_experiment_tracking/01_experiment_tracking/05_version_metrics.md similarity index 99% rename from docs/source/10_experiment_tracking/05_version_metrics.md rename to docs/source/03_experiment_tracking/01_experiment_tracking/05_version_metrics.md index 447c41d2..ac0c55d2 100644 --- a/docs/source/10_experiment_tracking/05_version_metrics.md +++ b/docs/source/03_experiment_tracking/01_experiment_tracking/05_version_metrics.md @@ -1,4 +1,4 @@ -# Version metrics +# Track metrics ## What is metric tracking? diff --git a/docs/source/10_experiment_tracking/06_mlflow_ui.md b/docs/source/03_experiment_tracking/01_experiment_tracking/06_mlflow_ui.md similarity index 97% rename from docs/source/10_experiment_tracking/06_mlflow_ui.md rename to docs/source/03_experiment_tracking/01_experiment_tracking/06_mlflow_ui.md index 2dd6ec2b..92711d65 100644 --- a/docs/source/10_experiment_tracking/06_mlflow_ui.md +++ b/docs/source/03_experiment_tracking/01_experiment_tracking/06_mlflow_ui.md @@ -1,4 +1,4 @@ -# Opening the UI +# Open the mlflow UI ## The mlflow user interface diff --git a/docs/source/11_interactive_use/01_notebook_use.md b/docs/source/03_experiment_tracking/02_interactive_use/01_notebook_use.md similarity index 95% rename from docs/source/11_interactive_use/01_notebook_use.md rename to docs/source/03_experiment_tracking/02_interactive_use/01_notebook_use.md index 8051e40c..6a2d2864 100644 --- a/docs/source/11_interactive_use/01_notebook_use.md +++ b/docs/source/03_experiment_tracking/02_interactive_use/01_notebook_use.md @@ -65,7 +65,7 @@ I suggest to : - **transition quickly to kedro pipelines**. For instance, when you preprocessing is roughly defined, try to put it in kedro pipelines. You can then use notebooks to experiment / perfom hyperparameter tuning while keeping preprocessing "fixed" to enhance reproducibility. You can run this pipeline interactively with : ```python -res = session.run( +result = session.run( pipeline_name="my_preprocessing_pipeline", tags="training", from_inputs="data_2", @@ -73,4 +73,4 @@ res = session.run( ) ``` -``res`` is a python dict with the outputs of your pipeline (e.g. a "preprocessed_data" ``pandas.DataFrame``), and you can use it interactively in your notebook. +``result`` is a python `dict` with the outputs of your pipeline (e.g. a "preprocessed_data" ``pandas.DataFrame``), and you can use it interactively in your notebook. diff --git a/docs/source/03_experiment_tracking/index.md b/docs/source/03_experiment_tracking/index.md new file mode 100644 index 00000000..c45ae433 --- /dev/null +++ b/docs/source/03_experiment_tracking/index.md @@ -0,0 +1,23 @@ +# {octicon}`beaker` Experiment tracking + +```{toctree} +:caption: Experiment tracking + +01_experiment_tracking/01_configuration +01_experiment_tracking/02_version_parameters +01_experiment_tracking/03_version_datasets +01_experiment_tracking/04_version_models +01_experiment_tracking/05_version_metrics +``` + +```{toctree} +:caption: Visualise experiments + +01_experiment_tracking/06_mlflow_ui +``` + +```{toctree} +:caption: Interactive use + +02_interactive_use/01_notebook_use +``` diff --git a/docs/source/03_quickstart/00_intro_tutorial.md b/docs/source/03_quickstart/00_intro_tutorial.md deleted file mode 100644 index 2c1fa881..00000000 --- a/docs/source/03_quickstart/00_intro_tutorial.md +++ /dev/null @@ -1,5 +0,0 @@ -# Goal of the tutorial - -This "Getting started" section demonstrates how to use some basic functionalities of `kedro-mlflow` in an end to end example. It is supposed to be simple and self-contained and is partially redundant with other sections, but far from complete. - -The section only focuses on the versioning part and does not show the "machine learning framework" abilities of the plugin. The goal is to give to a new user a quick glance to some capabiltiies so that he can decide whether the plugin suits its needs or not. It is totally worth checking the other sections to have a much more complete overview of what this plugin provides. diff --git a/docs/source/03_quickstart/index.rst b/docs/source/03_quickstart/index.rst deleted file mode 100644 index 90ad871d..00000000 --- a/docs/source/03_quickstart/index.rst +++ /dev/null @@ -1,9 +0,0 @@ -Introduction -============ - -.. toctree:: - :maxdepth: 4 - - Goal of the tutorial <00_intro_tutorial.md> - Create an example project <01_example_project.md> - First steps with ``kedro-mlflow`` <02_first_steps.md> diff --git a/docs/source/21_pipeline_serving/01_mlflow_models.md b/docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/01_mlflow_models.md similarity index 79% rename from docs/source/21_pipeline_serving/01_mlflow_models.md rename to docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/01_mlflow_models.md index f805cdc7..3b51398f 100644 --- a/docs/source/21_pipeline_serving/01_mlflow_models.md +++ b/docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/01_mlflow_models.md @@ -1,6 +1,6 @@ -# Pipeline serving with kedro-mlflow +# Introduction to mlflow models -## Introduction to Mlflow Models +## What are Mlflow Models ? [Mlflow Models are a standardised agnostic format to store machine learning models](https://www.mlflow.org/docs/latest/models.html). They intend to be standalone to be as portable as possible to be deployed virtually anywhere and mlflow provides built-in CLI commands to deploy a mlflow model to most common cloud platforms or to create an API. @@ -11,16 +11,18 @@ A Mlflow Model is composed of: - a ``model.pkl`` (or a ``python_function.pkl`` for custom model) file containing the trained model. - an ``artifacts`` folder containing all other data necessary to execute the models -Mlflow enable to create custom models "flavors" to convert any object to a Mlflow Model providing we have these informations. Inside a Kedro prpojects, the ``Pipeline`` and ``DataCatalog`` objects contains all these informations: as a consequence, it is easy to create a custom model to convert entire Kedro ``Pipeline``s to mlflow models. +```{important} +Mlflow enable to create **custom models "flavors" to convert any object to a Mlflow Model** provided we have these informations. Inside a Kedro project, the ``Pipeline`` and ``DataCatalog`` objects contain all these informations. As a consequence, it is easy to create a custom model to convert entire Kedro ``Pipeline``s to mlflow models, and it the purpose of ``pipeline_ml_factory`` and ``KedroPipelineModel`` that we will present in the following sections. +``` -## Pre-requisite for serving a pipeline +## Pre-requisite for converting a pipeline to a mlflow model You can log any Kedro ``Pipeline`` matching the following requirements: - one of its input must be a ``pandas.DataFrame``, a ``spark.DataFrame`` or a ``numpy.array``. This is the **input which contains the data to predict on**. This can be any Kedro ``AbstractDataset`` which loads data in one of the previous three formats. It can also be a ``MemoryDataset`` and not be persisted in the ``catalog.yml``. - all its other inputs must be persisted on disk (e.g. if the machine learning model must already be trained and saved so we can export it) or declared as "parameters" in the model ``Signature``. -```{note} +```{warning} If the pipeline has parameters : - For ``mlflow<2.7.0`` the parameters need to be persisted before exporting the model, which implies that you will not be able to modify them at runtime. This is a limitation of ``mlflow<2.6.0`` - For ``mlflow>=2.7.0`` , they can be declared in the signature and modified at runtime. See https://github.com/Galileo-Galilei/kedro-mlflow/issues/445 for more information. diff --git a/docs/source/21_pipeline_serving/02_scikit_learn_like_pipeline.md b/docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/02_scikit_learn_like_pipeline.md similarity index 95% rename from docs/source/21_pipeline_serving/02_scikit_learn_like_pipeline.md rename to docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/02_scikit_learn_like_pipeline.md index e9b23e89..cdc8897e 100644 --- a/docs/source/21_pipeline_serving/02_scikit_learn_like_pipeline.md +++ b/docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/02_scikit_learn_like_pipeline.md @@ -62,7 +62,7 @@ You can configure your project as follows: kedro run --pipeline=training ``` - **The inference pipeline will _automagically_ be logged as a custom mlflow model"** (a ``KedroPipelineModel``) **at the end of the training pipeline!**. + **The inference pipeline will _automagically_ be logged as a custom mlflow model** (a ``KedroPipelineModel``) **at the end of the training pipeline!**. 5. Go to the UI, retrieve the run id of your "inference pipeline" model and use it as you want, e.g. in the `catalog.yml`: @@ -108,10 +108,6 @@ A step by step tutorial with code is available in the [kedro-mlflow-tutorial rep You have also other resources to understand the rationale: -- an explanation of the [``PipelineML`` class in the python objects section](../07_python_objects/03_Pipelines.md) +- an explanation of the [``PipelineML`` class in the python objects section](../../05_API/01_python_objects/03_Pipelines.html) - detailed explanations [on this issue](https://github.com/Galileo-Galilei/kedro-mlflow/issues/16) and [this discussion](https://github.com/Galileo-Galilei/kedro-mlflow/discussions/229). - an example of use in a user project [in this repo](https://github.com/laurids-reichardt/kedro-examples/blob/kedro-mlflow-hotfix2/text-classification/src/text_classification/pipelines/pipeline.py). - -## Motivation - -You can find more about the motivations in . diff --git a/docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/03_deployment_patterns.md b/docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/03_deployment_patterns.md new file mode 100644 index 00000000..18adbffb --- /dev/null +++ b/docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/03_deployment_patterns.md @@ -0,0 +1,149 @@ +# Deployment patterns for kedro pipelines as model + +A step by step tutorial with code is available in the [kedro-mlflow-tutorial repository on github](https://github.com/Galileo-Galilei/kedro-mlflow-tutorial#serve-the-inference-pipeline-to-an-end-user) which explains how to serve the pipeline as an API or a batch. + +## Deploying a KedroPipelineModel + +::::{tab-set} + +:::{tab-item} Reuse from a python script + +```{note} +See tutorial: +``` + +If you want to load and predict with your model from python, the ``load_model`` function of mlflow is what you need: + +```python +PROJECT_PATH = r"" +RUN_ID = "" + +from kedro.framework.startup import bootstrap_project +from kedro.framework.session import KedroSession +from mlflow.pyfunc import load_model + +bootstrap_project(PROJECT_PATH) +session = Kedrosession.create( + session_id=1, + project_path=PROJECT_PATH, + package_name="kedro_mlflow_tutorial", +) +local_context = session.load_context() # setup mlflow config + +instances = local_context.io.load("instances") +model = load_model(f"runs:/{RUN_ID}/kedro_mlflow_tutorial") + +predictions = model.predict( + instances +) # runs ``session.run(pipeline=inference)`` with the artifacts created ruing training. You should see the kedro logs. +``` + +The ``predictions`` object is a ``pandas.DataFrame`` and can be handled as usual. +::: + +:::{tab-item} Reuse in a kedro pipeline + +```{note} +See tutorial: +``` + +Say that you want to reuse this trained model in a kedro Pipeline, like the user_app. The easiest way to do it is to add the model in the catalog.yml file + +```yaml +pipeline_inference_model: + type: kedro_mlflow.io.models.MlflowModelLoggerDataSet + flavor: mlflow.pyfunc + pyfunc_workflow: python_model + artifact_path: kedro_mlflow_tutorial # the name of your mlflow folder = the model_name in pipeline_ml_factory + run_id: # put it in globals.yml to help people find out what to modify +``` + +Then you can reuse it in a node to predict with this model which is the entire inference pipeline at the time you launched the training. + +```python +# nodes.py +def predict_from_model(model, data): + return model.predict(data) + + +# pipeline.py +def create_pipeline(): + return pipeline( + [ + node( + func=predict_from_model, + inputs={"model": pipeline_inference_model, "data": "validation_data"}, + ) + ] + ) +``` + +::: + +:::{tab-item} Serve the model with mlflow + +```{note} +See tutorial: +``` + +Mlflow provide helpers to serve the model as an API with one line of code: + +``mlflow models serve -m "runs://kedro_mlflow_tutorial"`` + +This will serve your model as an API (beware: there are known issues on windows). You can test it with: +``curl -d "{\"columns\":[\"text\"],\"index\":[0,1],\"data\":[[\"This movie is cool\"],[\"awful film\"]]}" -H "Content-Type: application/json" localhost:5000/invocations`` +::: + +:::: + +## Frequently asked questions + +:::{dropdown} How can I pass parameters at runtime to a ``KedroPipelineModel``? + +Since ``kedro-mlflow>0.14.0``, you can pass parameters when predicting with a ``KedroPipelineModel`` object. + +We assume you've trained a model with ``pipeline_factory_function``. First, load the model, e.g. through the catalog or as described in the previous section: + +```yaml +# catalog.yml +pipeline_inference_model: + type: kedro_mlflow.io.models.MlflowModelTrackingDataset + flavor: mlflow.pyfunc + pyfunc_workflow: python_model + artifact_path: kedro_mlflow_tutorial # the name of your mlflow folder = the model_name in pipeline_ml_factory + run_id: +``` + +Then, pass params as a dict under the ``params`` argument of the ``predict`` method: + +```python +catalog.load("pipeline_inference_model") # You can also load it in a node "as usual" +predictions = model.predict(input_data, params={"my_param": ""}) +``` + +```{warning} +This will only work if ``my_param`` is a parameter (i.e. prefixed with ``params:``) of the inference pipeline. +``` + +```{tip} +Available params are visible in the model signature in the UI +``` + +::: + +:::{dropdown} How can I change the runner at runtime when predicting with a ``KedroPipelineModel``? + +Assuming the syntax of previous section, a special key in "params" is reserved for the kedro runner: + +```python +catalog.load("pipeline_inference_model") +predictions = model.predict( + input_data, params={"my_param": "", "runner": "ThreadRunner"} +) +``` + +```{tip} +You can pass any kedro runner, or even a custom runner by using the path to the module: ``params={"runner": "my_package.my_module.MyRunner"}`` +``` + +::: diff --git a/docs/source/21_pipeline_serving/04_custom_kedro_pipeline_model.md b/docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/04_custom_kedro_pipeline_model.md similarity index 100% rename from docs/source/21_pipeline_serving/04_custom_kedro_pipeline_model.md rename to docs/source/04_pipeline_as_model/01_pipeline_as_custom_model/04_custom_kedro_pipeline_model.md diff --git a/docs/source/22_framework_ml/01_why_framework.md b/docs/source/04_pipeline_as_model/02_framework_ml/01_why_framework.md similarity index 95% rename from docs/source/22_framework_ml/01_why_framework.md rename to docs/source/04_pipeline_as_model/02_framework_ml/01_why_framework.md index 6967f783..c225be71 100644 --- a/docs/source/22_framework_ml/01_why_framework.md +++ b/docs/source/04_pipeline_as_model/02_framework_ml/01_why_framework.md @@ -112,7 +112,7 @@ As stated previous paragraph, the inference pipeline is not a primary concern wh - in the best case, you have trained the model from a git sha which is logged in mlflow. Any potential user can (but it takes time) recreate the exact inference pipeline from your source code, and retrieve all necessary artifacts from mlflow. This is tedious, error prone, and gives a lot of responsibility and work to your end user, but at least it makes your model usable. - most likely, you did not train your model from a version control commit. While experimenting /debug, it is very common to modify the code and retrain without committing. The exact code associated to a given model will likely be impossible to find out later. -> `kedro-mlflow` offers a `PipelineML` (and its helpers `pipeline_ml_factory`) class which binds the `training` and `inference` pipeline (similarly to ``scikit-learn`` ``Pipeline`` object), and a hook which autolog such pipelines when they are run. This enables data scientists to ensure that each training model is logged with its associated inference pipeline, and is ready to use for any end user. This decreases a lot the necessary cognitive complexity to ensure coherence between training and inference. +> `kedro-mlflow` offers a `PipelineML` (and its helper `pipeline_ml_factory`) class which binds the `training` and `inference` pipeline (similarly to ``scikit-learn`` ``Pipeline`` object), and a hook which autolog such pipelines when they are run. This enables data scientists to ensure that each training model is logged with its associated inference pipeline, and is ready to use for any end user. This decreases a lot the necessary cognitive complexity to ensure coherence between training and inference. ### Issue 4: Data scientists do not handle business objects @@ -132,7 +132,7 @@ Your model must handle business objects (e.g. a mail, a movie review, a customer ``kedro-mlflow`` assume that we declare a clear contrat of what the output of the data science project is: it is an an inference pipeline. This defines a clear "definition of done" of the data science project: is it ready to deploy? -The downside of such an approach is that it increases data scientist's responsibilities,because s(he) is responsible for his code. +The downside of such an approach is that it increases data scientist's responsibilities, because s(he) is responsible for his code. ``kedro-mlflow`` offers a very convenient way (through the ``pipeline_ml_factory`` function) to make sure that each experiment will result in creating a compliant "output". diff --git a/docs/source/22_framework_ml/02_ml_project_components.md b/docs/source/04_pipeline_as_model/02_framework_ml/02_ml_project_components.md similarity index 99% rename from docs/source/22_framework_ml/02_ml_project_components.md rename to docs/source/04_pipeline_as_model/02_framework_ml/02_ml_project_components.md index 0bba035e..4ebb6187 100644 --- a/docs/source/22_framework_ml/02_ml_project_components.md +++ b/docs/source/04_pipeline_as_model/02_framework_ml/02_ml_project_components.md @@ -27,7 +27,7 @@ Note that there are **as many _etl_app_ and _user_app_** as needed for the diffe We saw that the data scientist has to create some code that will be replaced by other people code when deploying the model. As a consequence, the interactions between these apps must be very clearly defined at the beginning of the project. We claim that it is possible to cover most use case with the following schema: -![apps_interaction](../imgs/apps_interaction.png) +![apps_interaction](../../imgs/apps_interaction.png) The *ml_app* takes `instances` (i.e. examples of the business object to handle) as input. This implies that the *ml_app* will include some machine learning-specific preprocessing and not only the model training. It also (optionally) takes labels as inputs if the underlying problem is supervised. Even in this situation, the labels will not be known at inference time so the *etl_app* does not necessarily produce them. diff --git a/docs/source/22_framework_ml/03_framework_solutions.md b/docs/source/04_pipeline_as_model/02_framework_ml/03_framework_solutions.md similarity index 97% rename from docs/source/22_framework_ml/03_framework_solutions.md rename to docs/source/04_pipeline_as_model/02_framework_ml/03_framework_solutions.md index b3ed92c9..ab18c30b 100644 --- a/docs/source/22_framework_ml/03_framework_solutions.md +++ b/docs/source/04_pipeline_as_model/02_framework_ml/03_framework_solutions.md @@ -2,7 +2,7 @@ ## Reminder -We assume that we want to solve the following challenges among those described in ["Why we need a mlops framework"](./01_why_framework.md) section: +We assume that we want to solve the following challenges among those described in ["Why we need a mlops framework"](./01_why_framework.html) section: - serve pipelines (which handles business objects) instead of models - synchronize training and inference by packaging inference pipeline at training time @@ -15,7 +15,7 @@ To solve the problem of desynchronization between training and inference, ``kedr This class implements several methods to compare the ``DataCatalog``s associated to each of the two binded pipelines and performs subsetting oparations. This makes it quite difficult to handle directly. Fortunately, ``kedro-mlflow`` provides a convenient API to create ``PipelineML`` objects: the ``pipeline_ml_factory`` function. -The use of ``pipeline_ml_factory`` is very straightforward, especially if you have used the [project architecture described previously](./02_ml_project_components.md). The best place to create such an object is your `hooks.py` file which will look like this: +The use of ``pipeline_ml_factory`` is very straightforward, especially if you have used the [project architecture described previously](./02_ml_project_components.html). The best place to create such an object is your `hooks.py` file which will look like this: ```python # hooks.py diff --git a/docs/source/04_pipeline_as_model/index.md b/docs/source/04_pipeline_as_model/index.md new file mode 100644 index 00000000..37038c47 --- /dev/null +++ b/docs/source/04_pipeline_as_model/index.md @@ -0,0 +1,19 @@ + +# {octicon}`rocket` Pipeline as model + +```{toctree} +:caption: Pipeline as model + +01_pipeline_as_custom_model/01_mlflow_models +01_pipeline_as_custom_model/02_scikit_learn_like_pipeline +01_pipeline_as_custom_model/03_deployment_patterns +01_pipeline_as_custom_model/04_custom_kedro_pipeline_model +``` + +```{toctree} +:caption: kedro-mlflow as a mlops framework + +02_framework_ml/01_why_framework +02_framework_ml/02_ml_project_components +02_framework_ml/03_framework_solutions +``` diff --git a/docs/source/30_python_objects/01_DataSets.md b/docs/source/05_API/01_python_objects/01_Datasets.md similarity index 98% rename from docs/source/30_python_objects/01_DataSets.md rename to docs/source/05_API/01_python_objects/01_Datasets.md index 97bab481..6e471702 100644 --- a/docs/source/30_python_objects/01_DataSets.md +++ b/docs/source/05_API/01_python_objects/01_Datasets.md @@ -1,4 +1,4 @@ -# New ``Dataset``s +# ``Dataset``s ## ``MlflowArtifactDataset`` @@ -41,7 +41,7 @@ csv_dataset = MlflowArtifactDataset( csv_dataset.save(data=pd.DataFrame({"a": [1, 2], "b": [3, 4]})) ``` -## Metrics `DataSets` +## Metrics `Datasets` ### ``MlflowMetricDataset`` @@ -52,7 +52,7 @@ csv_dataset.save(data=pd.DataFrame({"a": [1, 2], "b": [3, 4]})) [The ``MlflowMetricHistoryDataset`` is documented here](https://kedro-mlflow.readthedocs.io/en/latest/source/04_experimentation_tracking/05_version_metrics.html#saving-a-single-float-as-a-metric-with-mlflowmetricdataset). -## Models `DataSets` +## Models `Datasets` ### ``MlflowModelTrackingDataset`` @@ -60,7 +60,7 @@ The ``MlflowModelTrackingDataset`` accepts the following arguments: - flavor (str): Built-in or custom MLflow model flavor module. Must be Python-importable. - run_id (Optional[str], optional): MLflow run ID to use to load the model from or save the model to. It plays the same role as "filepath" for standard mlflow datasets. Defaults to None. -- artifact_path (str, optional): the run relative path tothe model. +- artifact_path (str, optional): the run relative path to the model. - pyfunc_workflow (str, optional): Either `python_model` or `loader_module`.See [mlflow workflows](https://www.mlflow.org/docs/latest/python_api/mlflow.pyfunc.html#workflows). - load_args (Dict[str, Any], optional): Arguments to `load_model` function from specified `flavor`. Defaults to None. - save_args (Dict[str, Any], optional): Arguments to `log_model` function from specified `flavor`. Defaults to None. diff --git a/docs/source/30_python_objects/02_Hooks.md b/docs/source/05_API/01_python_objects/02_Hooks.md similarity index 91% rename from docs/source/30_python_objects/02_Hooks.md rename to docs/source/05_API/01_python_objects/02_Hooks.md index 29f89188..5a3ba17c 100644 --- a/docs/source/30_python_objects/02_Hooks.md +++ b/docs/source/05_API/01_python_objects/02_Hooks.md @@ -9,4 +9,4 @@ This hook : 1. manages mlflow settings at the beginning and the end of the run (run start / end). 2. autolog nodes parameters each time the pipeline is run (with ``kedro run`` or programatically). 3. log useful informations for reproducibility as ``mlflow tags`` (including kedro ``Journal`` information for old kedro versions and the commands used to launch the run). - 4. register the pipeline as a valid ``mlflow model`` if [it is a ``PipelineML`` instance](./03_Pipelines.md) + 4. register the pipeline as a valid ``mlflow model`` if [it is a ``PipelineML`` instance](./03_Pipelines.html) diff --git a/docs/source/30_python_objects/03_Pipelines.md b/docs/source/05_API/01_python_objects/03_Pipelines.md similarity index 100% rename from docs/source/30_python_objects/03_Pipelines.md rename to docs/source/05_API/01_python_objects/03_Pipelines.md diff --git a/docs/source/30_python_objects/04_CLI.md b/docs/source/05_API/01_python_objects/04_CLI.md similarity index 94% rename from docs/source/30_python_objects/04_CLI.md rename to docs/source/05_API/01_python_objects/04_CLI.md index e77e302d..6f6a5efc 100644 --- a/docs/source/30_python_objects/04_CLI.md +++ b/docs/source/05_API/01_python_objects/04_CLI.md @@ -4,7 +4,7 @@ ``kedro mlflow init``: this command is needed to initalize your project. You cannot run any other commands before you run this one once. It performs 2 actions: - creates a ``mlflow.yml`` configuration file in your ``conf/local`` folder - - replace the ``src/PYTHON_PACKAGE/run.py`` file by an updated version of the template. If your template has been modified since project creation, a warning will be raised. You can either run ``kedro mlflow init --force`` to ignore this warning (but this will erase your ``run.py``) or [set hooks manually](../02_installation/02_setup.md#declaring-kedro-mlflow-hooks). + - replace the ``src/PYTHON_PACKAGE/run.py`` file by an updated version of the template. If your template has been modified since project creation, a warning will be raised. You can either run ``kedro mlflow init --force`` to ignore this warning (but this will erase your ``run.py``) or [set hooks manually](../../02_getting_started/01_installation/02_setup.html). `init` has two arguments: @@ -15,7 +15,7 @@ ``kedro mlflow ui``: this command opens the mlflow UI (basically launches the ``mlflow ui`` command ) -`ui` accepts the port and host arguments of [``mlflow ui`` command](https://www.mlflow.org/docs/latest/cli.html#mlflow-ui). The default values used will be the ones defined in the [``mlflow.yml`` configuration file under the `ui`](../10_experimentation_tracking/01_configuration.md#configure-the-user-interface). +`ui` accepts the port and host arguments of [``mlflow ui`` command](https://www.mlflow.org/docs/latest/cli.html#mlflow-ui). The default values used will be the ones defined in the [``mlflow.yml`` configuration file under the `ui`](../../03_experiment_tracking/01_experiment_tracking/01_configuration.html). If you provide the arguments at runtime, they wil take priority over the ``mlflow.yml``, e.g. if you have: diff --git a/docs/source/30_python_objects/05_Configuration.md b/docs/source/05_API/01_python_objects/05_Configuration.md similarity index 100% rename from docs/source/30_python_objects/05_Configuration.md rename to docs/source/05_API/01_python_objects/05_Configuration.md diff --git a/docs/source/31_API/kedro_mlflow.config.rst b/docs/source/05_API/02_autoapi/kedro_mlflow.config.rst similarity index 100% rename from docs/source/31_API/kedro_mlflow.config.rst rename to docs/source/05_API/02_autoapi/kedro_mlflow.config.rst diff --git a/docs/source/31_API/kedro_mlflow.framework.cli.rst b/docs/source/05_API/02_autoapi/kedro_mlflow.framework.cli.rst similarity index 100% rename from docs/source/31_API/kedro_mlflow.framework.cli.rst rename to docs/source/05_API/02_autoapi/kedro_mlflow.framework.cli.rst diff --git a/docs/source/31_API/kedro_mlflow.framework.hooks.rst b/docs/source/05_API/02_autoapi/kedro_mlflow.framework.hooks.rst similarity index 100% rename from docs/source/31_API/kedro_mlflow.framework.hooks.rst rename to docs/source/05_API/02_autoapi/kedro_mlflow.framework.hooks.rst diff --git a/docs/source/31_API/kedro_mlflow.io.rst b/docs/source/05_API/02_autoapi/kedro_mlflow.io.rst similarity index 95% rename from docs/source/31_API/kedro_mlflow.io.rst rename to docs/source/05_API/02_autoapi/kedro_mlflow.io.rst index 61e2b33d..5f561037 100644 --- a/docs/source/31_API/kedro_mlflow.io.rst +++ b/docs/source/05_API/02_autoapi/kedro_mlflow.io.rst @@ -1,7 +1,7 @@ Datasets ================================== -Artifact DataSet +Artifact Dataset ----------------- .. automodule:: kedro_mlflow.io.artifacts.mlflow_artifact_dataset @@ -9,7 +9,7 @@ Artifact DataSet :undoc-members: :show-inheritance: -Metrics DataSet +Metrics Dataset ---------------- .. automodule:: kedro_mlflow.io.metrics.mlflow_metric_dataset @@ -28,7 +28,7 @@ Metrics DataSet :undoc-members: :show-inheritance: -Models DataSet +Models Dataset --------------- .. automodule:: kedro_mlflow.io.models.mlflow_abstract_model_dataset diff --git a/docs/source/31_API/kedro_mlflow.mlflow.rst b/docs/source/05_API/02_autoapi/kedro_mlflow.mlflow.rst similarity index 100% rename from docs/source/31_API/kedro_mlflow.mlflow.rst rename to docs/source/05_API/02_autoapi/kedro_mlflow.mlflow.rst diff --git a/docs/source/31_API/kedro_mlflow.pipeline.rst b/docs/source/05_API/02_autoapi/kedro_mlflow.pipeline.rst similarity index 100% rename from docs/source/31_API/kedro_mlflow.pipeline.rst rename to docs/source/05_API/02_autoapi/kedro_mlflow.pipeline.rst diff --git a/docs/source/31_API/kedro_mlflow.rst b/docs/source/05_API/02_autoapi/kedro_mlflow.rst similarity index 100% rename from docs/source/31_API/kedro_mlflow.rst rename to docs/source/05_API/02_autoapi/kedro_mlflow.rst diff --git a/docs/source/05_API/index.md b/docs/source/05_API/index.md new file mode 100644 index 00000000..c45d82fc --- /dev/null +++ b/docs/source/05_API/index.md @@ -0,0 +1,18 @@ + +# API + +```{toctree} +:caption: Python objects + +01_python_objects/01_Datasets +01_python_objects/02_Hooks +01_python_objects/03_Pipelines +01_python_objects/04_CLI +01_python_objects/05_Configuration +``` + +```{toctree} +:caption: API + +02_autoapi/kedro_mlflow +``` diff --git a/docs/source/10_experiment_tracking/index.rst b/docs/source/10_experiment_tracking/index.rst deleted file mode 100644 index 05031f8c..00000000 --- a/docs/source/10_experiment_tracking/index.rst +++ /dev/null @@ -1,12 +0,0 @@ -Introduction -============ - -.. toctree:: - :maxdepth: 4 - - Configure mlflow <01_configuration.md> - Version parameters <02_version_parameters.md> - Version datasets <03_version_datasets.md> - Version models <04_version_models.md> - Version metrics <05_version_metrics.md> - Open the User Interface <06_mlflow_ui.md> diff --git a/docs/source/11_interactive_use/index.rst b/docs/source/11_interactive_use/index.rst deleted file mode 100644 index 2c42da9d..00000000 --- a/docs/source/11_interactive_use/index.rst +++ /dev/null @@ -1,7 +0,0 @@ -Introduction -============ - -.. toctree:: - :maxdepth: 4 - - How to use in a notebook <01_notebook_use.md> diff --git a/docs/source/21_pipeline_serving/03_deployment_patterns.md b/docs/source/21_pipeline_serving/03_deployment_patterns.md deleted file mode 100644 index 1204b6cd..00000000 --- a/docs/source/21_pipeline_serving/03_deployment_patterns.md +++ /dev/null @@ -1,65 +0,0 @@ -# Deployment patterns for kedro pipelines - -A step by step tutorial with code is available in the [kedro-mlflow-tutorial repository on github](https://github.com/Galileo-Galilei/kedro-mlflow-tutorial#serve-the-inference-pipeline-to-an-end-user) which explains how to serve the pipeline as an API or a batch. - -## Deploying a KedroPipelineModel - -### Reuse from a python script - -See tutorial: https://github.com/Galileo-Galilei/kedro-mlflow-tutorial?tab=readme-ov-file#scenario-1-reuse-from-a-python-script - -### Reuse in a kedro pipeline - -See tutorial: https://github.com/Galileo-Galilei/kedro-mlflow-tutorial?tab=readme-ov-file#scenario-2-reuse-in-a-kedro-pipeline - -### Serve the model with mlflow - -See tutorial: https://github.com/Galileo-Galilei/kedro-mlflow-tutorial?tab=readme-ov-file#scenario-3-serve-the-model-with-mlflow - -## Pass parameters at runtime to a Kedro PipelineModel - -### Pipeline parameters - -Since ``kedro-mlflow>0.14.0``, you can pass parameters when predicting with a ``KedroPipelineModel`` object. - -We assume you've trained a model with ``pipeline_factory_function``. First, load the model, e.g. through the catalog or as described in the previous section: - -```yaml -# catalog.yml -pipeline_inference_model: - type: kedro_mlflow.io.models.MlflowModelTrackingDataset - flavor: mlflow.pyfunc - pyfunc_workflow: python_model - artifact_path: kedro_mlflow_tutorial # the name of your mlflow folder = the model_name in pipeline_ml_factory - run_id: -``` - -Then, pass params as a dict under the ``params`` argument of the ``predict`` method: - -```python -catalog.load("pipeline_inference_model") # You can also load it in a node "as usual" -predictions = model.predict(input_data, params={"my_param": ""}) -``` - -```{warning} -This will only work if ``my_param`` is a parameter (i.e. prefixed with ``params:``) of the inference pipeline. -``` - -```{tip} -Available params are visible in the model signature in the UI -``` - -### Configuring the runner - -Assuming the syntax of previous section, a special key in "params" is reserved for the kedro runner: - -```python -catalog.load("pipeline_inference_model") -predictions = model.predict( - input_data, params={"my_param": "", "runner": "ThreadRunner"} -) -``` - -```{tip} -You can pass any kedro runner, or even a custom runner by using the path to the module: ``params={"runner": "my_package.my_module.MyRunner"}`` -``` diff --git a/docs/source/21_pipeline_serving/index.rst b/docs/source/21_pipeline_serving/index.rst deleted file mode 100644 index d21e490b..00000000 --- a/docs/source/21_pipeline_serving/index.rst +++ /dev/null @@ -1,10 +0,0 @@ -Introduction -============ - -.. toctree:: - :maxdepth: 4 - - Reminder on Mlflow Models <01_mlflow_models.md> - Scikit-learn like kedro pipelines with ``KedroPipelineModel`` <02_scikit_learn_like_pipeline.md> - Deployments patterns for ``KedroPipelineModel`` models <03_deployment_patterns.md> - Advanced logging for ``KedroPipelineModel`` <04_custom_kedro_pipeline_model.md> diff --git a/docs/source/22_framework_ml/index.rst b/docs/source/22_framework_ml/index.rst deleted file mode 100644 index dfc0e02f..00000000 --- a/docs/source/22_framework_ml/index.rst +++ /dev/null @@ -1,9 +0,0 @@ -Introduction -============ - -.. toctree:: - :maxdepth: 4 - - Why we need a mlops framework for development lifecycle <01_why_framework.md> - The architecture of a machine learning project <02_ml_project_components.md> - A framework for training / inference synchronization <03_framework_solutions.md> diff --git a/docs/source/30_python_objects/index.rst b/docs/source/30_python_objects/index.rst deleted file mode 100644 index d6708b74..00000000 --- a/docs/source/30_python_objects/index.rst +++ /dev/null @@ -1,11 +0,0 @@ -Introduction -============ - -.. toctree:: - :maxdepth: 4 - - DataSets <01_DataSets.md> - Hooks <02_Hooks.md> - Pipelines <03_Pipelines.md> - CLI <04_CLI.md> - Configuration <05_Configuration.md> diff --git a/docs/source/imgs/logo.png b/docs/source/imgs/logo.png new file mode 100644 index 00000000..4194197a Binary files /dev/null and b/docs/source/imgs/logo.png differ diff --git a/setup.py b/setup.py index 3123cb7d..029a4c4c 100644 --- a/setup.py +++ b/setup.py @@ -39,11 +39,12 @@ def _parse_requirements(path, encoding="utf-8"): extras_require={ "doc": [ "sphinx>=4.5.0,<9.0.0", - "sphinx_rtd_theme>=1.0,<3.1", "sphinx-markdown-tables~=0.0.15", "sphinx-click>=3.1,<6.1", "sphinx_copybutton~=0.5.0", "myst-parser>=0.17.2,<4.1.0", + "sphinx_design>=0.6.0,<0.7.0", + "pydata-sphinx-theme>=0.16.0,<0.17.0", ], "test": [ "pytest>=5.4.0, <9.0.0", # pytest==8.0.0 breaks pytest-lazy-fixture : https://github.com/TvoroG/pytest-lazy-fixture/issues/65