PaddlePaddle · LokeZhou · Dec 27, 2023 · Dec 15, 2023 · Dec 18, 2023 · Dec 19, 2023
diff --git a/ppdiffusers/ppdiffusers/models/dual_transformer_2d.py b/ppdiffusers/ppdiffusers/models/dual_transformer_2d.py
@@ -101,6 +101,7 @@ def forward(
         encoder_hidden_states,
         timestep=None,
         attention_mask=None,
+        encoder_attention_mask=None,
         cross_attention_kwargs=None,
         return_dict: bool = True,
     ):

diff --git a/tests/appflow/test_DualTextAndImageGuidedGeneration.py b/tests/appflow/test_DualTextAndImageGuidedGeneration.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import unittest
+import paddle
+import numpy as np
+
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
+from paddlemix.appflow import Appflow
+from ppdiffusers.utils import load_image, load_numpy
+
+
+class DualTextGuidedImageGeneration(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/benz.jpg"
+        cls.expected_image = 'https://bj.bcebos.com/v1/paddlenlp/models/community/paddlemix/appflow/test/test_DualTextAndImageGuidedGeneration/dual_text_and_image_guided_generation.png'
+    def test_image_generation(self):
+        image = load_image(self.url)
+        prompt = "a red car in the sun"
+        paddle.seed(1024)
+        app = Appflow(app='dual_text_and_image_guided_generation',models=['shi-labs/versatile-diffusion'])
+        image = app(prompt=prompt,image=image)['result']
+
+        self.assertIsNotNone(image)
+        #增加结果对比
+        expect_img = load_image(self.expected_image)
+
+        size = (512, 512)
+        image1 = image.resize(size)
+        image2 = expect_img.resize(size)
+
+        # 获取图像数据
+        data1 = list(image1.getdata())
+        data2 = list(image2.getdata())
+
+        # 计算每个像素点的差值，并求平均值
+        diff_sum = 0.0
+        for i in range(len(data1)):
+            diff_sum += sum(abs(c - d) for c, d in zip(data1[i], data2[i]))
+
+        average_diff = diff_sum / len(data1)
+
+        self.assertLessEqual(average_diff, 5)
+
+if __name__ == "__main__":
+
+    unittest.main()
diff --git a/tests/appflow/test_Image2ImageTextGuidedGeneration.py b/tests/appflow/test_Image2ImageTextGuidedGeneration.py
@@ -0,0 +1,67 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import unittest
+import paddle
+import numpy as np
+
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
+from paddlemix.appflow import Appflow
+from ppdiffusers.utils import load_image, load_numpy
+
+
+class Image2ImageTextGuidedGeneration(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/image_Kurisu.png"
+        cls.expected_image = 'https://bj.bcebos.com/v1/paddlenlp/models/community/paddlemix/appflow/test/test_Image2ImageTextGuidedGeneration/image2image_text_guided_generation.png'
+
+    def test_image_generation(self):
+        image = load_image(self.url).resize((512, 768))
+        prompt = "a red car in the sun"
+
+        paddle.seed(42)
+        prompt = "Kurisu Makise, looking at viewer, long hair, standing, 1girl, hair ornament, hair flower, cute, jacket, white flower, white dress"
+        negative_prompt = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry"
+
+
+        app = Appflow(app='image2image_text_guided_generation',models=['Linaqruf/anything-v3.0'])
+        image = app(prompt=prompt,negative_prompt=negative_prompt,image=image)['result']
+
+        self.assertIsNotNone(image)
+        #增加结果对比
+        expect_img = load_image(self.expected_image)
+
+        size = (512, 512)
+        image1 = image.resize(size)
+        image2 = expect_img.resize(size)
+
+        # 获取图像数据
+        data1 = list(image1.getdata())
+        data2 = list(image2.getdata())
+
+        # 计算每个像素点的差值，并求平均值
+        diff_sum = 0.0
+        for i in range(len(data1)):
+            diff_sum += sum(abs(c - d) for c, d in zip(data1[i], data2[i]))
+
+        average_diff = diff_sum / len(data1)
+
+        self.assertLessEqual(average_diff, 5)
+
+if __name__ == "__main__":
+
+    unittest.main()
diff --git a/tests/appflow/test_MusicGeneration.py b/tests/appflow/test_MusicGeneration.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import unittest
+
+
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
+from paddlemix.appflow import Appflow
+from ppdiffusers.utils import load_image, load_numpy
+import paddle
+
+class MusicGenerationTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        pass
+
+    def test_text2music(self):
+
+        task = Appflow(app="music_generation", models=["cvssp/audioldm"])
+        prompt = "A classic cocktail lounge vibe with smooth jazz piano and a cool, relaxed atmosphere."
+        negative_prompt = 'low quality, average quality, muffled quality, noise interference, poor and low-grade quality, inaudible quality, low-fidelity quality'  
+        audio_length_in_s = 5
+        num_inference_steps = 20
+        output_path = "tmp.wav"
+        result = task(prompt=prompt, negative_prompt=negative_prompt, num_inference_steps=num_inference_steps, audio_length_in_s=audio_length_in_s, generator = paddle.Generator().manual_seed(120))['result']
+
+        self.assertIsNotNone(result)
+
+    def test_image2music(self):
+        task1 = Appflow(app="music_generation", models=["miniGPT4/MiniGPT4-7B"])
+        negative_prompt = 'low quality, average quality, muffled quality, noise interference, poor and low-grade quality, inaudible quality, low-fidelity quality'  
+        audio_length_in_s = 5
+        num_inference_steps = 20
+        output_path = "tmp.wav"
+        minigpt4_text = 'describe the image, '
+        image_pil = load_image(self.url)
+
+        result = task1(image=image_pil, minigpt4_text=minigpt4_text )['result'].split('#')[0]
+        paddle.device.cuda.empty_cache()
+
+        self.assertIsNotNone(result)
+
+
+if __name__ == "__main__":
+
+    unittest.main()
diff --git a/tests/appflow/test_TextGuidedImageInpainting.py b/tests/appflow/test_TextGuidedImageInpainting.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import unittest
+
+import numpy as np
+
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
+from paddlemix.appflow import Appflow
+from ppdiffusers.utils import load_image, load_numpy
+import paddle
+
+class TextGuidedImageInpaintingTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.img_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations.png"
+        cls.mask_url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/stable-diffusion-v1-4/overture-creations-mask.png"
+        cls.expected_image = 'https://bj.bcebos.com/v1/paddlenlp/models/community/paddlemix/appflow/test/test_TextGuidedImageInpainting/inpainting.png'
+
+    def test_image_inpainting(self):
+        image = load_image(self.img_url)
+        mask_image = load_image(self.mask_url)
+        paddle.seed(1024)
+
+        prompt = "Face of a yellow cat, high resolution, sitting on a park bench"
+
+        app = Appflow(app='inpainting',models=['stabilityai/stable-diffusion-2-inpainting'])
+        image = app(inpaint_prompt=prompt,image=image,seg_masks=mask_image)['result']
+
+        self.assertIsNotNone(image)        
+        #增加结果对比
+        expect_img = load_image(self.expected_image)
+
+        size = (512, 512)
+        image1 = image.resize(size)
+        image2 = expect_img.resize(size)
+
+        # 获取图像数据
+        data1 = list(image1.getdata())
+        data2 = list(image2.getdata())
+
+        # 计算每个像素点的差值，并求平均值
+        diff_sum = 0.0
+        for i in range(len(data1)):
+            diff_sum += sum(abs(c - d) for c, d in zip(data1[i], data2[i]))
+
+        average_diff = diff_sum / len(data1)
+
+        self.assertLessEqual(average_diff, 5)
+
+if __name__ == "__main__":
+
+    unittest.main()
diff --git a/tests/appflow/test_TextGuidedImageUpscaling.py b/tests/appflow/test_TextGuidedImageUpscaling.py
@@ -0,0 +1,66 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import unittest
+
+import numpy as np
+import paddle
+
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
+from paddlemix.appflow import Appflow
+from ppdiffusers.utils import load_image, load_numpy
+
+
+class TextGuidedImageUpscalingTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.url = "https://paddlenlp.bj.bcebos.com/models/community/CompVis/data/low_res_cat.png"
+        cls.expected_image = 'https://bj.bcebos.com/v1/paddlenlp/models/community/paddlemix/appflow/test/test_TextGuidedImageUpscaling/upscaled_white_cat.png'
+
+    def test_image_upscaling(self):
+
+        low_res_img = load_image(self.url).resize((128, 128))
+
+        prompt = "a white cat"
+        paddle.seed(1024)
+
+        app = Appflow(app='image2image_text_guided_upscaling',models=['stabilityai/stable-diffusion-x4-upscaler'])
+        image = app(prompt=prompt,image=low_res_img)['result']
+
+        self.assertIsNotNone(image)
+        #增加结果对比
+        expect_img = load_image(self.expected_image)
+
+        size = (512, 512)
+        image1 = image.resize(size)
+        image2 = expect_img.resize(size)
+
+        # 获取图像数据
+        data1 = list(image1.getdata())
+        data2 = list(image2.getdata())
+
+        # 计算每个像素点的差值，并求平均值
+        diff_sum = 0.0
+        for i in range(len(data1)):
+            diff_sum += sum(abs(c - d) for c, d in zip(data1[i], data2[i]))
+
+        average_diff = diff_sum / len(data1)
+
+        self.assertLessEqual(average_diff, 5)
+
+if __name__ == "__main__":
+
+    unittest.main()
diff --git a/tests/appflow/test_audio-to-Caption.py b/tests/appflow/test_audio-to-Caption.py
@@ -0,0 +1,59 @@
+# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+import sys
+import unittest
+import tempfile
+import requests
+
+sys.path.insert(0, os.path.join(os.path.dirname(os.path.abspath(__file__)), "../.."))
+from paddlemix.appflow import Appflow
+import paddle
+from tests.testing_utils import _run_slow_test
+
+
+class AudioToCaptionTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        pass
+
+
+if __name__ == "__main__":
+
+    def create_test(name, static_mode):
+        def test_audio2caption(self):
+
+            paddle.seed(1024)
+            task = Appflow(app="audio2caption", models=["whisper", "THUDM/chatglm-6b"])
+            audio_file_url = "https://bj.bcebos.com/v1/paddlenlp/models/community/paddlemix/appflow/test/test_audio/zh.wav"
+
+            prompt = (
+                "描述这段话：{}."
+            )
+
+            with tempfile.NamedTemporaryFile() as audio_file:
+                audio_file.write(requests.get(audio_file_url).content)
+                result = task(audio=audio_file.name, prompt=prompt)['prompt']
+
+                self.assertIsNotNone(result)
+                self.assertIn('健康', result)
+
+        setattr(AudioToCaptionTest, name, test_audio2caption)
+
+    create_test(name="test_dygraph", static_mode=False)
+    if _run_slow_test:
+        create_test(name="test_static", static_mode=True)
+
+    unittest.main()