Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 3 additions & 5 deletions pdftotree/utils/pdf/pdf_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from functools import cmp_to_key
from typing import Any, Dict, List, Tuple

import numpy as np
from pdfminer.layout import LTFigure, LTTextLine
from pdfminer.utils import Plane

Expand Down Expand Up @@ -1274,11 +1275,8 @@ def get_page_width(boxes):


def get_char_width(boxes: List[LTTextLine]) -> float:
box_len_sum = 0
num_char_sum = 0
for i, b in enumerate(boxes):
box_len_sum = box_len_sum + b.bbox[2] - b.bbox[0]
num_char_sum = num_char_sum + len(b.get_text())
box_len_sum = np.sum([b.bbox[2] - b.bbox[0] for b in boxes])
num_char_sum = np.sum([len(b.get_text()) for b in boxes])
try:
return box_len_sum / num_char_sum
except ZeroDivisionError:
Expand Down