|
23 | 23 | import numpy as np |
24 | 24 | import requests |
25 | 25 | from packaging.version import Version |
26 | | -from PIL import Image, ImageDraw |
| 26 | +from PIL import Image, ImageDraw, ImageOps |
27 | 27 |
|
28 | 28 | from .image_utils import np2base64 |
29 | 29 | from .log import logger |
@@ -187,7 +187,8 @@ def read_image(self, image): |
187 | 187 | """ |
188 | 188 | image_buff = self._get_buffer(image) |
189 | 189 |
|
190 | | - _image = np.array(Image.open(BytesIO(image_buff)).convert("RGB")) |
| 190 | + # Use exif_transpose to correct orientation |
| 191 | + _image = np.array(ImageOps.exif_transpose(Image.open(BytesIO(image_buff)).convert("RGB"))) |
191 | 192 | return _image |
192 | 193 |
|
193 | 194 | @classmethod |
@@ -216,16 +217,18 @@ def read_pdf(self, pdf, password=None): |
216 | 217 | logger.warning("Currently only parse the first page for PDF input with more than one page.") |
217 | 218 |
|
218 | 219 | page = pdf_doc.load_page(0) |
219 | | - image = np.array(self.get_page_image(page).convert("RGB")) |
| 220 | + # The original image is shrunk when convertd from PDF by fitz, so we scale the image size by 10 times |
| 221 | + matrix = fitz.Matrix(10, 10) |
| 222 | + image = np.array(self.get_page_image(page, matrix).convert("RGB")) |
220 | 223 | return image |
221 | 224 |
|
222 | 225 | @classmethod |
223 | | - def get_page_image(self, page): |
| 226 | + def get_page_image(self, page, matrix): |
224 | 227 | """ |
225 | 228 | get page image |
226 | 229 | """ |
227 | | - pix = page.get_pixmap() |
228 | | - image_buff = pix.pil_tobytes("jpeg", optimize=True) |
| 230 | + pix = page.get_pixmap(matrix=matrix) |
| 231 | + image_buff = pix.pil_tobytes("jpeg") |
229 | 232 | return Image.open(BytesIO(image_buff)) |
230 | 233 |
|
231 | 234 | def init_ocr_inference(self): |
|
0 commit comments