enable custom vae for trt

tianleiwu · tianleiwu · commit 0cb402a8c858 · 2023-11-13T09:13:48.000Z
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md b/onnxruntime/python/tools/transformers/models/stable_diffusion/README.md
@@ -46,6 +46,7 @@ docker run --rm -it --gpus all -v $PWD:/workspace nvcr.io/nvidia/pytorch:23.10-p
 
 Optionally, you can update TensorRT from 8.6.1 to latest pre-release.
 ```
+python3 -m pip install --upgrade pip
 python3 -m pip install --pre --upgrade --extra-index-url https://pypi.nvidia.com tensorrt
 ```
 
@@ -60,7 +61,7 @@ sh build.sh --config Release  --build_shared_lib --parallel --use_cuda --cuda_ve
             --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF \
             --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=80 \
             --allow_running_as_root
-python3 -m pip install build/Linux/Release/dist/onnxruntime_gpu-1.17.0-cp310-cp310-linux_x86_64.whl
+python3 -m pip install build/Linux/Release/dist/onnxruntime_gpu-1.17.0-cp310-cp310-linux_x86_64.whl --force-reinstall
 ```
 
 If the GPU is not A100, change `CMAKE_CUDA_ARCHITECTURES=80` in the command line according to the GPU compute capacity.
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_models.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/diffusion_models.py
@@ -90,13 +90,15 @@ def __init__(
         use_vae=False,
         min_image_size=256,
         max_image_size=1024,
+        use_fp16_vae=True,
     ):
         self.version = version
         self._is_inpaint = is_inpaint
         self._is_refiner = is_refiner
         self._use_vae = use_vae
         self._min_image_size = min_image_size
         self._max_image_size = max_image_size
+        self._use_fp16_vae = use_fp16_vae
         if is_refiner:
             assert self.is_xl()
 
@@ -127,6 +129,13 @@ def stages(self) -> List[str]:
     def vae_scaling_factor(self) -> float:
         return 0.13025 if self.is_xl() else 0.18215
 
+    def vae_torch_fallback(self) -> bool:
+        return self.is_xl() and not self._use_fp16_vae
+
+    def custom_fp16_vae(self) -> Optional[str]:
+        # For SD XL, use a VAE that fine-tuned to run in fp16 precision without generating NaNs
+        return "madebyollin/sdxl-vae-fp16-fix" if self._use_fp16_vae and self.is_xl() else None
+
     @staticmethod
     def supported_versions(is_xl: bool):
         return ["xl-1.0"] if is_xl else ["1.4", "1.5", "2.0-base", "2.0", "2.1", "2.1-base"]
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/engine_builder.py
@@ -60,15 +60,8 @@ def __init__(
         self.torch_device = torch.device(device, torch.cuda.current_device())
         self.stages = pipeline_info.stages()
 
-        # TODO: use custom fp16 for ORT_TRT, and no need to fallback to torch.
-        self.vae_torch_fallback = self.pipeline_info.is_xl() and engine_type != EngineType.ORT_CUDA
-
-        # For SD XL, use an VAE that modified to run in fp16 precision without generating NaNs.
-        self.custom_fp16_vae = (
-            "madebyollin/sdxl-vae-fp16-fix"
-            if self.pipeline_info.is_xl() and self.engine_type == EngineType.ORT_CUDA
-            else None
-        )
+        self.vae_torch_fallback = self.pipeline_info.vae_torch_fallback()
+        self.custom_fp16_vae = self.pipeline_info.custom_fp16_vae()
 
         self.models = {}
         self.engines = {}
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py b/onnxruntime/python/tools/transformers/models/stable_diffusion/pipeline_stable_diffusion.py
@@ -104,8 +104,6 @@ def __init__(
 
         self.stages = pipeline_info.stages()
 
-        self.vae_torch_fallback = self.pipeline_info.is_xl()
-
         self.use_cuda_graph = use_cuda_graph
 
         self.tokenizer = None
diff --git a/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements.txt b/onnxruntime/python/tools/transformers/models/stable_diffusion/requirements.txt
@@ -13,3 +13,5 @@ sympy
 optimum==1.13.1
 safetensors
 invisible_watermark
+# newer version of opencv-python migth encounter module 'cv2.dnn' has no attribute 'DictValue' error
+opencv-python==4.8.0.74