Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
9194b20
Separate the open/open correction into two versions which strive to a…
AngledLuffa Apr 26, 2024
fa54b1e
Separate out another advance-past-constituent function which will be …
AngledLuffa Apr 26, 2024
cca6fab
Add an in-order-compound oracle that handles the unambiguous transitions
AngledLuffa Apr 26, 2024
d08d488
Add a script which splits a dataset (such as id_icon) into 90/10 pieces
AngledLuffa Apr 27, 2024
c77a9b3
Add an option in which checking the constituent labels only optionall…
AngledLuffa Apr 29, 2024
2ca4c47
Add a script to randomly shuffle (with replacement) a constituency da…
AngledLuffa May 1, 2024
db4e2f2
Oops, need to actually report the error
AngledLuffa May 1, 2024
41f9e89
Add a short name for the transition schemes to make save names using …
AngledLuffa May 1, 2024
93afaab
Make an error when building an ensemble a bit more explained
AngledLuffa May 2, 2024
6407615
Upgrade the random tree selection to enforce that there are examples …
AngledLuffa May 3, 2024
3aaae34
Allow for reporting that an oracle repair actually cannot be repaired…
AngledLuffa May 3, 2024
c7add96
Move the ambiguous in_order_compound correction to the end of the cor…
AngledLuffa May 3, 2024
3fa3898
Add a flag to not remove training duplicates for conparse - useful wh…
AngledLuffa May 5, 2024
ae97619
Add the half-baked script I've been using to grep through constituenc…
AngledLuffa May 6, 2024
ec8fd43
Also, add a script to grep through the test results for parsers
AngledLuffa May 6, 2024
8c3f122
List oracle repairs on separate lines for readability
AngledLuffa May 6, 2024
e6343d0
Log repairs missed as well as repairs made for InOrder
AngledLuffa May 6, 2024
a57a2a3
Add a few optional closes for the ambiguous in-order shift/open errors
AngledLuffa May 7, 2024
17c8d4d
When running the dynamic oracle, do all transitions marked 'debug' so…
AngledLuffa May 8, 2024
8d79592
Update the close_shift_nested to cover more cases (note: if this happ…
AngledLuffa May 8, 2024
7ae2057
Solve certain cases of close-open-shift/shift in the in-order oracle
AngledLuffa May 9, 2024
51a40ce
Although this in-order transition repair should be unambiguous, it se…
AngledLuffa May 9, 2024
7ab173e
Use a refactored version of a test in the in-order oracle unittest
AngledLuffa May 9, 2024
5aabb40
Add another wider test to the close_shift_shift test and refactor the…
AngledLuffa May 9, 2024
b6a6f1e
Add versions of the shift_shift to cover the ambiguous cases, either …
AngledLuffa May 9, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 50 additions & 1 deletion stanza/models/constituency/dynamic_oracle.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,48 @@ def advance_past_constituents(gold_sequence, cur_index):
cur_index = cur_index + 1
return None

def find_previous_open(gold_sequence, cur_index):
"""
Go backwards from cur_index to find the open which opens the previous block of stuff.

Return None if it can't be found.
"""
count = 0
cur_index = cur_index - 1
while cur_index >= 0:
if isinstance(gold_sequence[cur_index], OpenConstituent):
count = count + 1
if count > 0:
return cur_index
elif isinstance(gold_sequence[cur_index], CloseConstituent):
count = count - 1
cur_index = cur_index - 1
return None

def find_in_order_constituent_end(gold_sequence, cur_index):
"""
Advance cur_index through gold_sequence until the next block has ended

This is different from advance_past_constituents in that it will
also return when there is a Shift when count == 0. That way, we
return the first block of things we know attach to the left
"""
count = 0
saw_shift = False
while cur_index < len(gold_sequence):
if isinstance(gold_sequence[cur_index], OpenConstituent):
count = count + 1
elif isinstance(gold_sequence[cur_index], CloseConstituent):
count = count - 1
if count == -1: return cur_index
elif isinstance(gold_sequence[cur_index], Shift):
if saw_shift and count == 0:
return cur_index
else:
saw_shift = True
cur_index = cur_index + 1
return None

class DynamicOracle():
def __init__(self, root_labels, oracle_level, repair_types, additional_levels, deactivated_levels):
self.root_labels = root_labels
Expand Down Expand Up @@ -46,11 +88,18 @@ def fix_error(self, gold_transition, pred_transition, gold_sequence, gold_index)
for repair_type in self.repair_types:
if repair_type.fn is None:
continue
if self.oracle_level is not None and repair_type.value > self.oracle_level and repair_type not in self.additional_levels:
if self.oracle_level is not None and repair_type.value > self.oracle_level and repair_type not in self.additional_levels and not repair_type.debug:
continue
if repair_type in self.deactivated_levels:
continue
repair = repair_type.fn(gold_transition, pred_transition, gold_sequence, gold_index, self.root_labels)
if repair is None:
continue

if isinstance(repair, tuple) and len(repair) == 2:
return repair

# TODO: could update all of the returns to be tuples of length 2
if repair is not None:
return repair_type, repair

Expand Down
2 changes: 1 addition & 1 deletion stanza/models/constituency/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ def __init__(self, args, filenames=None, models=None, foundation_cache=None):
if self.models[0].transition_scheme() != model.transition_scheme():
raise ValueError("Models {} and {} are incompatible. {} vs {}".format(filenames[0], filenames[model_idx], self.models[0].transition_scheme(), model.transition_scheme()))
if self.models[0].transitions != model.transitions:
raise ValueError("Models %s and %s are incompatible: different transitions" % (filenames[0], filenames[model_idx]))
raise ValueError(f"Models {filenames[0]} and {filenames[model_idx]} are incompatible: different transitions\n{filenames[0]}:\n{self.models[0].transitions}\n{filenames[model_idx]}:\n{model.transitions}")
if self.models[0].constituents != model.constituents:
raise ValueError("Models %s and %s are incompatible: different constituents" % (filenames[0], filenames[model_idx]))
if self.models[0].root_labels != model.root_labels:
Expand Down
Loading