pytorch
diff --git a/‎docs/v2.2.0/.nojekyll b/‎docs/v2.2.0/.nojekyll
diff --git a/‎docs/v2.2.0/_cpp_api/classtorch__tensorrt_1_1DataType.html
+942 b/‎docs/v2.2.0/_cpp_api/classtorch__tensorrt_1_1DataType.html
+942
diff --git a/‎docs/v2.2.0/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html
+879 b/‎docs/v2.2.0/_cpp_api/classtorch__tensorrt_1_1Device_1_1DeviceType.html
+879
diff --git a/‎docs/v2.2.0/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html
+911 b/‎docs/v2.2.0/_cpp_api/classtorch__tensorrt_1_1TensorFormat.html
+911
diff --git a/‎docs/v2.2.0/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html
+875 b/‎docs/v2.2.0/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8CacheCalibrator.html
+875
diff --git a/‎docs/v2.2.0/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html
+885 b/‎docs/v2.2.0/_cpp_api/classtorch__tensorrt_1_1ptq_1_1Int8Calibrator.html
+885
diff --git a/‎docs/v2.2.0/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html
+770 b/‎docs/v2.2.0/_cpp_api/define_macros_8h_1a18d295a837ac71add5578860b55e5502.html
+770
diff --git a/‎docs/v2.2.0/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html
+770 b/‎docs/v2.2.0/_cpp_api/define_macros_8h_1a282fd3c0b1c3a215148ae372070e1268.html
+770
diff --git a/‎docs/v2.2.0/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html
+770 b/‎docs/v2.2.0/_cpp_api/define_macros_8h_1a31398a6d4d27e28817afb0f0139e909e.html
+770
diff --git a/‎docs/v2.2.0/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html
+770 b/‎docs/v2.2.0/_cpp_api/define_macros_8h_1a35703561b26b1a9d2738ad7d58b27827.html
+770
diff --git a/‎docs/v2.2.0/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html
+770 b/‎docs/v2.2.0/_cpp_api/define_macros_8h_1abd1465eb38256d3f22cc1426b23d516b.html
+770
diff --git a/‎docs/v2.2.0/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html
+770 b/‎docs/v2.2.0/_cpp_api/define_macros_8h_1abe87b341f562fd1cf40b7672e4d759da.html
+770
diff --git a/‎docs/v2.2.0/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html
+770 b/‎docs/v2.2.0/_cpp_api/define_macros_8h_1ad19939408f7be171a74a89928b36eb59.html
+770
diff --git a/‎docs/v2.2.0/_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.html
+770 b/‎docs/v2.2.0/_cpp_api/define_macros_8h_1adad592a7b1b7eed529cdf6acd584c883.html
+770
diff --git a/‎docs/v2.2.0/_cpp_api/dir_cpp.html
+753 b/‎docs/v2.2.0/_cpp_api/dir_cpp.html
+753
diff --git a/‎docs/v2.2.0/_cpp_api/dir_cpp_include.html
+754 b/‎docs/v2.2.0/_cpp_api/dir_cpp_include.html
+754
diff --git a/‎docs/v2.2.0/_cpp_api/dir_cpp_include_torch_tensorrt.html
+757 b/‎docs/v2.2.0/_cpp_api/dir_cpp_include_torch_tensorrt.html
+757
diff --git a/‎docs/v2.2.0/_cpp_api/enum_logging_8h_1a130f65408ad8cbaee060f05e8db69558.html
+808 b/‎docs/v2.2.0/_cpp_api/enum_logging_8h_1a130f65408ad8cbaee060f05e8db69558.html
+808
diff --git a/‎docs/v2.2.0/_cpp_api/enum_torch__tensorrt_8h_1a3fbe5d72e4fc624dbd038853079620eb.html
+787 b/‎docs/v2.2.0/_cpp_api/enum_torch__tensorrt_8h_1a3fbe5d72e4fc624dbd038853079620eb.html
+787
diff --git a/‎docs/v2.2.0/_cpp_api/file_cpp_include_torch_tensorrt_logging.h.html
+809 b/‎docs/v2.2.0/_cpp_api/file_cpp_include_torch_tensorrt_logging.h.html
+809
diff --git a/‎docs/v2.2.0/_cpp_api/file_cpp_include_torch_tensorrt_macros.h.html
+795 b/‎docs/v2.2.0/_cpp_api/file_cpp_include_torch_tensorrt_macros.h.html
+795
diff --git a/‎docs/v2.2.0/_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.html
+806 b/‎docs/v2.2.0/_cpp_api/file_cpp_include_torch_tensorrt_ptq.h.html
+806
diff --git a/‎docs/v2.2.0/_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.html
+821 b/‎docs/v2.2.0/_cpp_api/file_cpp_include_torch_tensorrt_torch_tensorrt.h.html
+821
diff --git a/‎docs/v2.2.0/_cpp_api/function_logging_8h_1a0593f776f469c20469e2f729fc7861a3.html
+770 b/‎docs/v2.2.0/_cpp_api/function_logging_8h_1a0593f776f469c20469e2f729fc7861a3.html
+770
diff --git a/‎docs/v2.2.0/_cpp_api/function_logging_8h_1a0c012cb374addd90eb1f42eaec570650.html
+776 b/‎docs/v2.2.0/_cpp_api/function_logging_8h_1a0c012cb374addd90eb1f42eaec570650.html
+776
diff --git a/‎docs/v2.2.0/_cpp_api/function_logging_8h_1a56e110feaaba2c3fd44bd201fd21a76a.html
+776 b/‎docs/v2.2.0/_cpp_api/function_logging_8h_1a56e110feaaba2c3fd44bd201fd21a76a.html
+776
diff --git a/‎docs/v2.2.0/_cpp_api/function_logging_8h_1a7cb50492421ea9de4e3db895819df6f2.html
+776 b/‎docs/v2.2.0/_cpp_api/function_logging_8h_1a7cb50492421ea9de4e3db895819df6f2.html
+776
diff --git a/‎docs/v2.2.0/_cpp_api/function_logging_8h_1ac46ac0901cb97e3ae6e93b45f24e90b8.html
+779 b/‎docs/v2.2.0/_cpp_api/function_logging_8h_1ac46ac0901cb97e3ae6e93b45f24e90b8.html
+779
diff --git a/‎docs/v2.2.0/_cpp_api/function_logging_8h_1ad2efd47b6c3689e58ccc595680579ae5.html
+776 b/‎docs/v2.2.0/_cpp_api/function_logging_8h_1ad2efd47b6c3689e58ccc595680579ae5.html
+776
diff --git a/‎docs/v2.2.0/_cpp_api/function_logging_8h_1af8f3443813315af7901903d25dd495cc.html
+770 b/‎docs/v2.2.0/_cpp_api/function_logging_8h_1af8f3443813315af7901903d25dd495cc.html
+770
diff --git a/‎docs/v2.2.0/_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c.html
+785 b/‎docs/v2.2.0/_cpp_api/function_ptq_8h_1a226e3c83379d1012cde8578c1c86b16c.html
+785
diff --git a/‎docs/v2.2.0/_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178.html
+791 b/‎docs/v2.2.0/_cpp_api/function_ptq_8h_1a6186e305f47c1d94b6130ef6c7f7e178.html
+791
diff --git a/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1a5b405fd3bf3c8fc2e2a54cbbab979797.html
+785 b/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1a5b405fd3bf3c8fc2e2a54cbbab979797.html
+785
diff --git a/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1a6e19490a08fb1553c9dd347a5ae79db9.html
+785 b/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1a6e19490a08fb1553c9dd347a5ae79db9.html
+785
diff --git a/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1a81f9783517335dda877d8cfcf38987c9.html
+791 b/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1a81f9783517335dda877d8cfcf38987c9.html
+791
diff --git a/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1ac4ab8313ae72c2c899ea31548b528528.html
+776 b/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1ac4ab8313ae72c2c899ea31548b528528.html
+776
diff --git a/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1ad1acd06eaeaffbbcf6e7ebf426891384.html
+776 b/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1ad1acd06eaeaffbbcf6e7ebf426891384.html
+776
diff --git a/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html
+771 b/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1ad6a4ee8ca6c8f6e5519eb1128ec7f4a1.html
+771
diff --git a/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1ae8d56472106eeef37fbe51ff7f40c9b2.html
+785 b/‎docs/v2.2.0/_cpp_api/function_torch__tensorrt_8h_1ae8d56472106eeef37fbe51ff7f40c9b2.html
+785
diff --git a/‎docs/v2.2.0/_cpp_api/namespace_torch.html
+756 b/‎docs/v2.2.0/_cpp_api/namespace_torch.html
+756
diff --git a/‎docs/v2.2.0/_cpp_api/namespace_torch_tensorrt.html
+804 b/‎docs/v2.2.0/_cpp_api/namespace_torch_tensorrt.html
+804
diff --git a/‎docs/v2.2.0/_cpp_api/namespace_torch_tensorrt__logging.html
+785 b/‎docs/v2.2.0/_cpp_api/namespace_torch_tensorrt__logging.html
+785
diff --git a/‎docs/v2.2.0/_cpp_api/namespace_torch_tensorrt__ptq.html
+781 b/‎docs/v2.2.0/_cpp_api/namespace_torch_tensorrt__ptq.html
+781
diff --git a/‎docs/v2.2.0/_cpp_api/namespace_torch_tensorrt__torchscript.html
+782 b/‎docs/v2.2.0/_cpp_api/namespace_torch_tensorrt__torchscript.html
+782
diff --git a/‎docs/v2.2.0/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.html
+785 b/‎docs/v2.2.0/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_logging.h.html
+785
diff --git a/‎docs/v2.2.0/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.html
+784 b/‎docs/v2.2.0/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_macros.h.html
+784
diff --git a/‎docs/v2.2.0/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.html
+924 b/‎docs/v2.2.0/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_ptq.h.html
+924
diff --git a/‎docs/v2.2.0/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.html
+1,088 b/‎docs/v2.2.0/_cpp_api/program_listing_file_cpp_include_torch_tensorrt_torch_tensorrt.h.html
+1,088
diff --git a/‎docs/v2.2.0/_cpp_api/structtorch__tensorrt_1_1Device.html
+927 b/‎docs/v2.2.0/_cpp_api/structtorch__tensorrt_1_1Device.html
+927
diff --git a/‎docs/v2.2.0/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html
+785 b/‎docs/v2.2.0/_cpp_api/structtorch__tensorrt_1_1GraphInputs.html
+785
diff --git a/‎docs/v2.2.0/_cpp_api/structtorch__tensorrt_1_1Input.html
+1,106 b/‎docs/v2.2.0/_cpp_api/structtorch__tensorrt_1_1Input.html
+1,106
diff --git a/‎docs/v2.2.0/_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.html
+945 b/‎docs/v2.2.0/_cpp_api/structtorch__tensorrt_1_1torchscript_1_1CompileSpec.html
+945
diff --git a/‎docs/v2.2.0/_cpp_api/torch_tensort_cpp.html
+1,152 b/‎docs/v2.2.0/_cpp_api/torch_tensort_cpp.html
+1,152
diff --git a/‎docs/v2.2.0/_cpp_api/unabridged_orphan.html
+842 b/‎docs/v2.2.0/_cpp_api/unabridged_orphan.html
+842
diff --git a/‎docs/v2.2.0/_downloads/0daf1d0af656cac7b808856b71e6616f/torch_compile_resnet_example.ipynb
+158 b/‎docs/v2.2.0/_downloads/0daf1d0af656cac7b808856b71e6616f/torch_compile_resnet_example.ipynb
+158
diff --git a/‎docs/v2.2.0/_downloads/0e30a6276601af7e5fc4d5166e2e3d37/torch_compile_advanced_usage.py
+107 b/‎docs/v2.2.0/_downloads/0e30a6276601af7e5fc4d5166e2e3d37/torch_compile_advanced_usage.py
+107
diff --git a/‎docs/v2.2.0/_downloads/26d49aeeb9c710e27197fda28b7c3516/yi_jing_01_chien.jpg
6.46 KB b/‎docs/v2.2.0/_downloads/26d49aeeb9c710e27197fda28b7c3516/yi_jing_01_chien.jpg
6.46 KB
diff --git a/‎docs/v2.2.0/_downloads/46b3e6febaab06324aa2715896895544/torch_compile_stable_diffusion.py
+55 b/‎docs/v2.2.0/_downloads/46b3e6febaab06324aa2715896895544/torch_compile_stable_diffusion.py
+55
diff --git a/‎docs/v2.2.0/_downloads/6a6052d9668b2cb8332d349d328e21c1/_rendered_examples_jupyter.zip
18.4 KB b/‎docs/v2.2.0/_downloads/6a6052d9668b2cb8332d349d328e21c1/_rendered_examples_jupyter.zip
18.4 KB
diff --git a/‎docs/v2.2.0/_downloads/798cda8f83bd9f5e2cc93f329a04332c/_rendered_examples_python.zip
10.6 KB b/‎docs/v2.2.0/_downloads/798cda8f83bd9f5e2cc93f329a04332c/_rendered_examples_python.zip
10.6 KB
@@ -0,0 +1,158 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n\n# Compiling ResNet using the Torch-TensorRT `torch.compile` Backend\n\nThis interactive script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a ResNet model.\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Imports and Model Definition\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "import torch\nimport torch_tensorrt\nimport torchvision.models as models"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Initialize model with half precision and sample inputs\nmodel = models.resnet18(pretrained=True).half().eval().to(\"cuda\")\ninputs = [torch.randn((1, 3, 224, 224)).to(\"cuda\").half()]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Optional Input Arguments to `torch_tensorrt.compile`\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Enabled precision for TensorRT optimization\nenabled_precisions = {torch.half}\n\n# Whether to print verbose logs\ndebug = True\n\n# Workspace size for TensorRT\nworkspace_size = 20 << 30\n\n# Maximum number of TRT Engines\n# (Lower value allows more graph segmentation)\nmin_block_size = 7\n\n# Operations to Run in Torch, regardless of converter support\ntorch_executed_ops = {}"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Compilation with `torch_tensorrt.compile`\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Build and compile the model with torch.compile, using Torch-TensorRT backend\noptimized_model = torch_tensorrt.compile(\n    model,\n    ir=\"torch_compile\",\n    inputs=inputs,\n    enabled_precisions=enabled_precisions,\n    debug=debug,\n    workspace_size=workspace_size,\n    min_block_size=min_block_size,\n    torch_executed_ops=torch_executed_ops,\n)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Equivalently, we could have run the above via the torch.compile frontend, as so:\n`optimized_model = torch.compile(model, backend=\"torch_tensorrt\", options={\"enabled_precisions\": enabled_precisions, ...}); optimized_model(*inputs)`\n\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Inference\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Does not cause recompilation (same batch size as input)\nnew_inputs = [torch.randn((1, 3, 224, 224)).half().to(\"cuda\")]\nnew_outputs = optimized_model(*new_inputs)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Does cause recompilation (new batch size)\nnew_batch_size_inputs = [torch.randn((8, 3, 224, 224)).half().to(\"cuda\")]\nnew_batch_size_outputs = optimized_model(*new_batch_size_inputs)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Cleanup\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Finally, we use Torch utilities to clean up the workspace\ntorch._dynamo.reset()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Cuda Driver Error Note\n\nOccasionally, upon exiting the Python runtime after Dynamo compilation with `torch_tensorrt`,\none may encounter a Cuda Driver Error. This issue is related to https://github.com/NVIDIA/TensorRT/issues/2052\nand can be resolved by wrapping the compilation/inference in a function and using a scoped call, as in::\n\n      if __name__ == '__main__':\n          compile_engine_and_infer()\n\n"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.12"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1,107 @@
+"""
+.. _torch_compile_advanced_usage:
+
+Torch Compile Advanced Usage
+======================================================
+
+This interactive script is intended as an overview of the process by which `torch_tensorrt.compile(..., ir="torch_compile", ...)` works, and how it integrates with the `torch.compile` API."""
+
+# %%
+# Imports and Model Definition
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+import torch
+import torch_tensorrt
+
+# %%
+
+
+# We begin by defining a model
+class Model(torch.nn.Module):
+    def __init__(self) -> None:
+        super().__init__()
+        self.relu = torch.nn.ReLU()
+
+    def forward(self, x: torch.Tensor, y: torch.Tensor):
+        x_out = self.relu(x)
+        y_out = self.relu(y)
+        x_y_out = x_out + y_out
+        return torch.mean(x_y_out)
+
+
+# %%
+# Compilation with `torch.compile` Using Default Settings
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+# Define sample float inputs and initialize model
+sample_inputs = [torch.rand((5, 7)).cuda(), torch.rand((5, 7)).cuda()]
+model = Model().eval().cuda()
+
+# %%
+
+# Next, we compile the model using torch.compile
+# For the default settings, we can simply call torch.compile
+# with the backend "torch_tensorrt", and run the model on an
+# input to cause compilation, as so:
+optimized_model = torch.compile(model, backend="torch_tensorrt", dynamic=False)
+optimized_model(*sample_inputs)
+
+# %%
+# Compilation with `torch.compile` Using Custom Settings
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+# First, we use Torch utilities to clean up the workspace
+# after the previous compile invocation
+torch._dynamo.reset()
+
+# Define sample half inputs and initialize model
+sample_inputs_half = [
+    torch.rand((5, 7)).half().cuda(),
+    torch.rand((5, 7)).half().cuda(),
+]
+model_half = Model().eval().cuda()
+
+# %%
+
+# If we want to customize certain options in the backend,
+# but still use the torch.compile call directly, we can provide
+# custom options to the backend via the "options" keyword
+# which takes in a dictionary mapping options to values.
+#
+# For accepted backend options, see the CompilationSettings dataclass:
+# py/torch_tensorrt/dynamo/_settings.py
+backend_kwargs = {
+    "enabled_precisions": {torch.half},
+    "debug": True,
+    "min_block_size": 2,
+    "torch_executed_ops": {"torch.ops.aten.sub.Tensor"},
+    "optimization_level": 4,
+    "use_python_runtime": False,
+}
+
+# Run the model on an input to cause compilation, as so:
+optimized_model_custom = torch.compile(
+    model_half,
+    backend="torch_tensorrt",
+    options=backend_kwargs,
+    dynamic=False,
+)
+optimized_model_custom(*sample_inputs_half)
+
+# %%
+# Cleanup
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+# Finally, we use Torch utilities to clean up the workspace
+torch._dynamo.reset()
+
+# %%
+# Cuda Driver Error Note
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+#
+# Occasionally, upon exiting the Python runtime after Dynamo compilation with `torch_tensorrt`,
+# one may encounter a Cuda Driver Error. This issue is related to https://github.com/NVIDIA/TensorRT/issues/2052
+# and can be resolved by wrapping the compilation/inference in a function and using a scoped call, as in::
+#
+#       if __name__ == '__main__':
+#           compile_engine_and_infer()
@@ -0,0 +1,55 @@
+"""
+.. _torch_compile_stable_diffusion:
+
+Torch Compile Stable Diffusion
+======================================================
+
+This interactive script is intended as a sample of the Torch-TensorRT workflow with `torch.compile` on a Stable Diffusion model. A sample output is featured below:
+
+.. image:: /tutorials/images/majestic_castle.png
+   :width: 512px
+   :height: 512px
+   :scale: 50 %
+   :align: right
+"""
+
+# %%
+# Imports and Model Definition
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+import torch
+from diffusers import DiffusionPipeline
+
+import torch_tensorrt
+
+model_id = "CompVis/stable-diffusion-v1-4"
+device = "cuda:0"
+
+# Instantiate Stable Diffusion Pipeline with FP16 weights
+pipe = DiffusionPipeline.from_pretrained(
+    model_id, revision="fp16", torch_dtype=torch.float16
+)
+pipe = pipe.to(device)
+
+backend = "torch_tensorrt"
+
+# Optimize the UNet portion with Torch-TensorRT
+pipe.unet = torch.compile(
+    pipe.unet,
+    backend=backend,
+    options={
+        "truncate_long_and_double": True,
+        "precision": torch.float16,
+    },
+    dynamic=False,
+)
+
+# %%
+# Inference
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+prompt = "a majestic castle in the clouds"
+image = pipe(prompt).images[0]
+
+image.save("images/majestic_castle.png")
+image.show()