Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ Fixed
work even when the input mention is not tabular.
(`#425 <https://github.com/HazyResearch/fonduer/issues/425>`_)
(`#426 <https://github.com/HazyResearch/fonduer/pull/426>`_)
* `@HiromuHota`_: Fix the order of args to Bbox.
(`#443 <https://github.com/HazyResearch/fonduer/issues/443>`_)
(`#444 <https://github.com/HazyResearch/fonduer/pull/444>`_)

0.8.2_ - 2020-04-28
-------------------
Expand Down
4 changes: 2 additions & 2 deletions src/fonduer/parser/visual_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def _coordinates_from_HTML(
word_id = (page_num, i)
pdf_word_list.append((word_id, content))
coordinate_map[word_id] = Bbox(
page_num, y_min_line, xmin, y_max_line, xmax,
page_num, y_min_line, y_max_line, xmin, xmax,
)
block_coordinates[word_id] = (y_min_block, x_min_block)
i += 1
Expand Down Expand Up @@ -318,7 +318,7 @@ def display_match_counts() -> int:

def _update_coordinates(self) -> Iterator[Sentence]:
for sentence in self.sentences:
(page, top, left, bottom, right) = list(
(page, top, bottom, left, right) = list(
zip(
*[
self.coordinate_map[self.links[((sentence.stable_id), i)]]
Expand Down
10 changes: 5 additions & 5 deletions src/fonduer/utils/visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ def display_boxes(
boxes_by_page: DefaultDict[
int, List[Tuple[int, int, int, int]]
] = defaultdict(list)
for i, (page, top, left, bottom, right) in enumerate(boxes):
for i, (page, top, bottom, left, right) in enumerate(boxes):
boxes_per_page[page] += 1
boxes_by_page[page].append((top, left, bottom, right))
boxes_by_page[page].append((top, bottom, left, right))
for i, page_num in enumerate(boxes_per_page.keys()):
img = pdf_to_img(pdf_file, page_num)
draw.fill_color = transparent
for j, (top, left, bottom, right) in enumerate(boxes_by_page[page_num]):
for j, (top, bottom, left, right) in enumerate(boxes_by_page[page_num]):
draw.stroke_color = colors[j % 2] if alternate_colors else colors[0]
draw.rectangle(left=left, top=top, right=right, bottom=bottom)
draw(img)
Expand Down Expand Up @@ -103,8 +103,8 @@ def display_words(
Bbox(
sentence.page[i],
sentence.top[i],
sentence.left[i],
sentence.bottom[i],
sentence.left[i],
sentence.right[i],
)
)
Expand All @@ -117,8 +117,8 @@ def get_box(span: SpanMention) -> Bbox:
return Bbox(
min(span.get_attrib_tokens("page")),
min(span.get_attrib_tokens("top")),
min(span.get_attrib_tokens("left")),
max(span.get_attrib_tokens("bottom")),
min(span.get_attrib_tokens("left")),
max(span.get_attrib_tokens("right")),
)

Expand Down
17 changes: 13 additions & 4 deletions tests/utils/test_visualizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

def test_visualizer():
"""Unit test of visualizer using the md document."""
from fonduer.utils.visualizer import Visualizer # noqa
from fonduer.utils.visualizer import Visualizer, get_box # noqa

docs_path = "tests/data/html_simple/md.html"
pdf_path = "tests/data/pdf_simple/md.pdf"
Expand Down Expand Up @@ -38,12 +38,21 @@ def test_visualizer():

doc = candidate_extractor_udf.apply(doc, split=0)

cands = doc.organizations
# Take one candidate
cand = doc.organizations[0]

# Test visualizer
pdf_path = "tests/data/pdf_simple"
vis = Visualizer(pdf_path)
vis.display_candidates([cands[0]])

# Test bounding boxes
boxes = [get_box(mention.context) for mention in cand.get_mentions()]
for box in boxes:
assert box.top <= box.bottom
assert box.left <= box.right
assert boxes == [mention.context.get_bbox() for mention in cand.get_mentions()]

# Test visualizer
vis.display_candidates([cand])


def test_get_pdf_dim():
Expand Down