gradio depth,normal,hough ok (#5873)

zhiboniu · web-flow · commit b7246e1ac8cd · 2023-05-09T14:39:50.000+08:00
diff --git a/ppdiffusers/examples/controlnet/README.md b/ppdiffusers/examples/controlnet/README.md
@@ -39,12 +39,35 @@ python gradio_hed2image.py
 python gradio_pose2image.py
 ```
 ![image](https://user-images.githubusercontent.com/20476674/222131475-4dc8582a-d2a2-447a-9724-85461de04c26.png)
+
 ## Semantic Segmentation to Image
 采用ADE20K分割协议的图片作为控制条件。
 ```
 python gradio_seg2image_segmenter.py
 ```
 ![image](https://user-images.githubusercontent.com/20476674/222131908-b0c52512-ef42-4e4b-8fde-62c12c600ff2.png)
+
+## Depth to Image
+采用Depth深度检测图片作为控制条件。
+```
+python gradio_depth2image.py
+```
+![image](https://user-images.githubusercontent.com/31800336/236171819-29085f22-c99c-4f63-b0a0-7cce6ac98ebc.jpg)
+
+## Normal to Image
+采用Normal检测图片作为控制条件。
+```
+python gradio_normal2image.py
+```
+![image](https://user-images.githubusercontent.com/31800336/236171840-f31a4f1c-9997-41c0-83ca-4f87ca4cc870.jpg)
+
+## Hough Line to Image
+采用HoughLine检测图片作为控制条件。
+```
+python gradio_hough2image.py
+```
+![image](https://user-images.githubusercontent.com/31800336/236171830-f9254b66-9fbd-46d3-a3bc-e905c87d0ec3.jpg)
+
 # ControlNet模型训练
 
 ## Fill50K 训练例子
diff --git a/ppdiffusers/examples/controlnet/annotator/midas_paddle/api_inference.py b/ppdiffusers/examples/controlnet/annotator/midas_paddle/api_inference.py
@@ -63,6 +63,10 @@ def __init__(self, model_dir, model_name="dpt_hybrid", batchsize=8, device="GPU"
                 use_static=False,
                 use_calib_mode=False,
             )
+            min_input_shape = {"image": [1, 3, 224, 224]}
+            max_input_shape = {"image": [1, 3, 1280, 1280]}
+            opt_input_shape = {"image": [1, 3, 384, 384]}
+            config.set_trt_dynamic_shape_info(min_input_shape, max_input_shape, opt_input_shape)
 
         # disable print log when predict
         config.disable_glog_info()
diff --git a/ppdiffusers/examples/controlnet/annotator/mlsd/models/mbv2_mlsd_large.py b/ppdiffusers/examples/controlnet/annotator/mlsd/models/mbv2_mlsd_large.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 import paddle
-import utils
+from annotator.mlsd import utils
 
 
 class BlockTypeA(paddle.nn.Layer):
diff --git a/ppdiffusers/examples/controlnet/gradio_depth2image.py b/ppdiffusers/examples/controlnet/gradio_depth2image.py
@@ -0,0 +1,131 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+
+import cv2
+import gradio as gr
+import paddle
+from annotator.midas_paddle import MidasDetector_Infer as MidasDetector
+from annotator.util import HWC3, resize_image
+
+from paddlenlp.trainer import set_seed as seed_everything
+from ppdiffusers import ControlNetModel, StableDiffusionControlNetPipeline
+
+apply_midas = MidasDetector()
+
+controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-depth")
+pipe = StableDiffusionControlNetPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
+)
+
+
+def process(
+    input_image,
+    prompt,
+    a_prompt,
+    n_prompt,
+    num_samples,
+    image_resolution,
+    detect_resolution,
+    ddim_steps,
+    guess_mode,
+    strength,
+    scale,
+    seed,
+    eta,
+):
+    with paddle.no_grad():
+        input_image = HWC3(input_image)
+        detected_map, _ = apply_midas(resize_image(input_image, detect_resolution))
+        detected_map = HWC3(detected_map)
+        img = resize_image(input_image, image_resolution)
+        H, W, C = img.shape
+        detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_NEAREST)
+
+        control = paddle.to_tensor(detected_map.copy(), dtype=paddle.float32) / 255.0
+        control = control.unsqueeze(0).transpose([0, 3, 1, 2])
+
+        control_scales = (
+            [strength * (0.825 ** float(12 - i)) for i in range(13)] if guess_mode else ([strength] * 13)
+        )  # Magic number. IDK why. Perhaps because 0.825**12<0.01 but 0.826**12>0.01
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        results = []
+        for _ in range(num_samples):
+            img = pipe(
+                prompt + ", " + a_prompt,
+                negative_prompt=n_prompt,
+                image=control,
+                num_inference_steps=ddim_steps,
+                height=H,
+                width=W,
+                eta=eta,
+                controlnet_conditioning_scale=control_scales,
+                guidance_scale=scale,
+            ).images[0]
+            results.append(img)
+
+    return [detected_map] + results
+
+
+block = gr.Blocks().queue()
+with block:
+    with gr.Row():
+        gr.Markdown("## Control Stable Diffusion with Depth Maps")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(source="upload", type="numpy")
+            prompt = gr.Textbox(label="Prompt")
+            run_button = gr.Button(label="Run")
+            with gr.Accordion("Advanced options", open=False):
+                num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
+                image_resolution = gr.Slider(label="Image Resolution", minimum=256, maximum=768, value=512, step=64)
+                strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
+                guess_mode = gr.Checkbox(label="Guess Mode", value=False)
+                detect_resolution = gr.Slider(label="Depth Resolution", minimum=128, maximum=1024, value=384, step=1)
+                ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
+                scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
+                seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
+                eta = gr.Number(label="eta (DDIM)", value=0.0)
+                a_prompt = gr.Textbox(label="Added Prompt", value="best quality, extremely detailed")
+                n_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
+                )
+        with gr.Column():
+            result_gallery = gr.Gallery(label="Output", show_label=False, elem_id="gallery").style(
+                grid=2, height="auto"
+            )
+    ips = [
+        input_image,
+        prompt,
+        a_prompt,
+        n_prompt,
+        num_samples,
+        image_resolution,
+        detect_resolution,
+        ddim_steps,
+        guess_mode,
+        strength,
+        scale,
+        seed,
+        eta,
+    ]
+    run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
+
+
+block.launch(server_name="0.0.0.0")
diff --git a/ppdiffusers/examples/controlnet/gradio_hough2image.py b/ppdiffusers/examples/controlnet/gradio_hough2image.py
@@ -0,0 +1,143 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+# Copyright 2023 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import random
+
+import cv2
+import gradio as gr
+import paddle
+from annotator.mlsd import MLSDdetector
+from annotator.util import HWC3, resize_image
+
+from paddlenlp.trainer import set_seed as seed_everything
+from ppdiffusers import ControlNetModel, StableDiffusionControlNetPipeline
+
+apply_mlsd = MLSDdetector()
+
+controlnet = ControlNetModel.from_pretrained("lllyasviel/sd-controlnet-mlsd")
+pipe = StableDiffusionControlNetPipeline.from_pretrained(
+    "runwayml/stable-diffusion-v1-5", controlnet=controlnet, safety_checker=None
+)
+
+
+def process(
+    input_image,
+    prompt,
+    a_prompt,
+    n_prompt,
+    num_samples,
+    image_resolution,
+    detect_resolution,
+    ddim_steps,
+    guess_mode,
+    strength,
+    scale,
+    seed,
+    eta,
+    value_threshold,
+    distance_threshold,
+):
+    with paddle.no_grad():
+        input_image = HWC3(input_image)
+        detected_map = apply_mlsd(resize_image(input_image, detect_resolution), value_threshold, distance_threshold)
+        detected_map = HWC3(detected_map)
+        img = resize_image(input_image, image_resolution)
+        H, W, C = img.shape
+        detected_map = cv2.resize(detected_map, (W, H), interpolation=cv2.INTER_NEAREST)
+
+        control = paddle.to_tensor(detected_map.copy(), dtype=paddle.float32) / 255.0
+        control = control.unsqueeze(0).transpose([0, 3, 1, 2])
+
+        control_scales = (
+            [strength * (0.825 ** float(12 - i)) for i in range(13)] if guess_mode else ([strength] * 13)
+        )  # Magic number. IDK why. Perhaps because 0.825**12<0.01 but 0.826**12>0.01
+        if seed == -1:
+            seed = random.randint(0, 65535)
+        seed_everything(seed)
+        results = []
+        for _ in range(num_samples):
+            img = pipe(
+                prompt + ", " + a_prompt,
+                negative_prompt=n_prompt,
+                image=control,
+                num_inference_steps=ddim_steps,
+                height=H,
+                width=W,
+                eta=eta,
+                controlnet_conditioning_scale=control_scales,
+                guidance_scale=scale,
+            ).images[0]
+            results.append(img)
+
+    return [detected_map] + results
+
+
+block = gr.Blocks().queue()
+with block:
+    with gr.Row():
+        gr.Markdown("## Control Stable Diffusion with Hough Line Maps")
+    with gr.Row():
+        with gr.Column():
+            input_image = gr.Image(source="upload", type="numpy")
+            prompt = gr.Textbox(label="Prompt")
+            run_button = gr.Button(label="Run")
+            with gr.Accordion("Advanced options", open=False):
+                num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
+                image_resolution = gr.Slider(label="Image Resolution", minimum=256, maximum=768, value=512, step=64)
+                strength = gr.Slider(label="Control Strength", minimum=0.0, maximum=2.0, value=1.0, step=0.01)
+                guess_mode = gr.Checkbox(label="Guess Mode", value=False)
+                detect_resolution = gr.Slider(
+                    label="Hough Line Resolution", minimum=128, maximum=1024, value=512, step=1
+                )
+                value_threshold = gr.Slider(
+                    label="Hough value threshold (MLSD)", minimum=0.01, maximum=2.0, value=0.1, step=0.01
+                )
+                distance_threshold = gr.Slider(
+                    label="Hough distance threshold (MLSD)", minimum=0.01, maximum=20.0, value=0.1, step=0.01
+                )
+                ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
+                scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
+                seed = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, step=1, randomize=True)
+                eta = gr.Number(label="eta (DDIM)", value=0.0)
+                a_prompt = gr.Textbox(label="Added Prompt", value="best quality, extremely detailed")
+                n_prompt = gr.Textbox(
+                    label="Negative Prompt",
+                    value="longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality",
+                )
+        with gr.Column():
+            result_gallery = gr.Gallery(label="Output", show_label=False, elem_id="gallery").style(
+                grid=2, height="auto"
+            )
+    ips = [
+        input_image,
+        prompt,
+        a_prompt,
+        n_prompt,
+        num_samples,
+        image_resolution,
+        detect_resolution,
+        ddim_steps,
+        guess_mode,
+        strength,
+        scale,
+        seed,
+        eta,
+        value_threshold,
+        distance_threshold,
+    ]
+    run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
+
+
+block.launch(server_name="0.0.0.0")
diff --git a/ppdiffusers/examples/controlnet/gradio_normal2image.py b/ppdiffusers/examples/controlnet/gradio_normal2image.py