Skip to content

Commit 1280aa4

Browse files
dsblankcdhornstevenyoungs
authored andcommitted
Refactor, fix, and optimize filters/rules
This change does four things: 1. Unrolls the recursive call of filter.apply(), and splits out single checks into filter.apply_to_one(). 2. Uses data attributes (`person.gender`) rather than accessor functions (`person.get_gender()`) where possible. 3. Adds an optimizer based on `rule.selected_handles` sets. 4. Adds typing hints to make sure the right objects are passed into methods. Final comparison of finding those related to home person in 40k person Family Tree, between Gramps 5.2 and this change (Gramps 6.0), in seconds (smaller is better): Version | Prepare Time | Apply Time | Total Time --------| -------------:|-----------:|----------: Gramps 5.2 | 4.5 | 27.7 | 32.2 Gramps 6.0 | 8.0 | 0.5 | 8.5 The above uses the optimizer. Here is a test finding all people with a tag (5 people match): Version | Prepare Time | Apply Time | Total Time --------| -------------:|-----------:|----------: Gramps 5.2 | 0.0 | 5.0 | 5.0 Gramps 6.0 | 0.0 | 1.6 | 1.6 Recall that converting from JSON to objects is a little slower than converting from array BLOBS to objects, so this is a large improvement. Co-authored-by: Christopher Horn <[email protected]> Co-authored-by: stevenyoungs <[email protected]>
1 parent ca6c3b2 commit 1280aa4

File tree

152 files changed

+2579
-987
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

152 files changed

+2579
-987
lines changed

gramps/gen/db/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,5 @@
7171
from .undoredo import *
7272
from .utils import *
7373
from .generic import *
74+
75+
Database = DbGeneric

gramps/gen/db/generic.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
# Copyright (C) 2015-2016 Gramps Development Team
55
# Copyright (C) 2016 Nick Hall
66
# Copyright (C) 2024 Doug Blank
7+
# Copyright (C) 2024,2025 Steve Youngs <[email protected]>
78
#
89
# This program is free software; you can redistribute it and/or modify
910
# it under the terms of the GNU General Public License as published by
@@ -2782,3 +2783,6 @@ def set_serializer(self, serializer_name):
27822783
self.serializer = BlobSerializer
27832784
elif serializer_name == "json":
27842785
self.serializer = JSONSerializer
2786+
2787+
2788+
Database = DbGeneric

gramps/gen/filters/_genericfilter.py

Lines changed: 153 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@
2424
Package providing filtering framework for Gramps.
2525
"""
2626

27+
import logging
28+
import time
29+
2730
# ------------------------------------------------------------------------
2831
#
2932
# Gramps imports
@@ -40,8 +43,11 @@
4043
from ..lib.note import Note
4144
from ..lib.tag import Tag
4245
from ..const import GRAMPS_LOCALE as glocale
46+
from .rules import Rule
47+
from .optimizer import Optimizer
4348

4449
_ = glocale.translation.gettext
50+
LOG = logging.getLogger(".filter.results")
4551

4652

4753
# -------------------------------------------------------------------------
@@ -52,7 +58,7 @@
5258
class GenericFilter:
5359
"""Filter class that consists of several rules."""
5460

55-
logical_functions = ["or", "and", "xor", "one"]
61+
logical_functions = ["and", "or", "one"]
5662

5763
def __init__(self, source=None):
5864
if source:
@@ -74,10 +80,8 @@ def match(self, handle, db):
7480
"""
7581
Return True or False depending on whether the handle matches the filter.
7682
"""
77-
if self.apply(db, [handle]):
78-
return True
79-
else:
80-
return False
83+
obj = self.get_object(handle)
84+
return self.apply_to_one(db, obj)
8185

8286
def is_empty(self):
8387
return (len(self.flist) == 0) or (
@@ -88,10 +92,7 @@ def set_logical_op(self, val):
8892
if val in GenericFilter.logical_functions:
8993
self.logical_op = val
9094
else:
91-
self.logical_op = "and"
92-
93-
def get_logical_op(self):
94-
return self.logical_op
95+
raise Exception("invalid operator: %r" % val)
9596

9697
def set_invert(self, val):
9798
self.invert = bool(val)
@@ -138,99 +139,116 @@ def find_from_handle(self, db, handle):
138139
def get_number(self, db):
139140
return db.get_number_of_people()
140141

141-
def check_func(self, db, id_list, task, user=None, tupleind=None, tree=False):
142+
def apply_logical_op_to_all(
143+
self, db, id_list, apply_logical_op, user=None, tupleind=None, tree=False
144+
):
142145
final_list = []
143-
if user:
144-
user.begin_progress(_("Filter"), _("Applying ..."), self.get_number(db))
146+
147+
optimizer = Optimizer(self)
148+
handles_in, handles_out = optimizer.get_handles()
149+
150+
LOG.debug(
151+
"Optimizer handles_in: %s",
152+
len(handles_in) if handles_in is not None else None,
153+
)
154+
LOG.debug("Optimizer handles_out: %s", len(handles_out))
145155
if id_list is None:
146-
with self.get_tree_cursor(db) if tree else self.get_cursor(db) as cursor:
147-
for handle, data in cursor:
148-
person = db.serializer.data_to_object(data)
156+
if handles_in is not None:
157+
if user:
158+
user.begin_progress(_("Filter"), _("Applying ..."), len(handles_in))
159+
160+
# Use these rather than going through entire database
161+
for handle in handles_in:
149162
if user:
150163
user.step_progress()
151-
if task(db, person) != self.invert:
152-
final_list.append(handle)
164+
165+
if handle is None:
166+
continue
167+
168+
obj = self.get_object(db, handle)
169+
170+
if apply_logical_op(db, obj, self.flist) != self.invert:
171+
final_list.append(obj.handle)
172+
173+
else:
174+
with (
175+
self.get_tree_cursor(db) if tree else self.get_cursor(db)
176+
) as cursor:
177+
if user:
178+
user.begin_progress(
179+
_("Filter"), _("Applying ..."), self.get_number(db)
180+
)
181+
182+
for handle, obj in cursor:
183+
if user:
184+
user.step_progress()
185+
186+
if handle in handles_out:
187+
continue
188+
189+
if apply_logical_op(db, obj, self.flist) != self.invert:
190+
final_list.append(handle)
191+
153192
else:
154-
for data in id_list:
155-
if tupleind is None:
156-
handle = data
157-
else:
158-
handle = data[tupleind]
159-
person = self.find_from_handle(db, handle)
193+
if user:
194+
id_list = list(id_list)
195+
user.begin_progress(_("Filter"), _("Applying ..."), len(id_list))
196+
for handle_data in id_list:
160197
if user:
161198
user.step_progress()
162-
if task(db, person) != self.invert:
163-
final_list.append(data)
164-
if user:
165-
user.end_progress()
166-
return final_list
167199

168-
def check_and(self, db, id_list, user=None, tupleind=None, tree=False):
169-
final_list = []
170-
flist = self.flist
171-
if user:
172-
user.begin_progress(_("Filter"), _("Applying ..."), self.get_number(db))
173-
if id_list is None:
174-
with self.get_tree_cursor(db) if tree else self.get_cursor(db) as cursor:
175-
for handle, data in cursor:
176-
person = db.serializer.data_to_object(data)
177-
if user:
178-
user.step_progress()
179-
val = all(rule.apply(db, person) for rule in flist)
180-
if val != self.invert:
181-
final_list.append(handle)
182-
else:
183-
for data in id_list:
184200
if tupleind is None:
185-
handle = data
201+
handle = handle_data
186202
else:
187-
handle = data[tupleind]
188-
person = self.find_from_handle(db, handle)
189-
if user:
190-
user.step_progress()
191-
val = all(rule.apply(db, person) for rule in flist if person)
192-
if val != self.invert:
193-
final_list.append(data)
194-
if user:
195-
user.end_progress()
196-
return final_list
203+
handle = handle_data[tupleind]
197204

198-
def check_or(self, db, id_list, user=None, tupleind=None, tree=False):
199-
return self.check_func(db, id_list, self.or_test, user, tupleind, tree=False)
205+
if handles_in is not None:
206+
if handle not in handles_in:
207+
continue
208+
elif handle in handles_out:
209+
continue
200210

201-
def check_one(self, db, id_list, user=None, tupleind=None, tree=False):
202-
return self.check_func(db, id_list, self.one_test, user, tupleind, tree=False)
211+
obj = self.get_object(db, handle)
203212

204-
def check_xor(self, db, id_list, user=None, tupleind=None, tree=False):
205-
return self.check_func(db, id_list, self.xor_test, user, tupleind, tree=False)
213+
if apply_logical_op(db, obj, self.flist) != self.invert:
214+
final_list.append(handle_data)
206215

207-
def xor_test(self, db, person):
208-
test = False
209-
for rule in self.flist:
210-
test = test ^ rule.apply(db, person)
211-
return test
216+
if user:
217+
user.end_progress()
218+
219+
return final_list
212220

213-
def one_test(self, db, person):
221+
def and_test(self, db, data: dict, flist):
222+
return all(rule.apply_to_one(db, data) for rule in flist)
223+
224+
def one_test(self, db, data: dict, flist):
214225
found_one = False
215-
for rule in self.flist:
216-
if rule.apply(db, person):
226+
for rule in flist:
227+
if rule.apply_to_one(db, data):
217228
if found_one:
218229
return False # There can be only one!
219230
found_one = True
220231
return found_one
221232

222-
def or_test(self, db, person):
223-
return any(rule.apply(db, person) for rule in self.flist)
233+
def or_test(self, db, data: dict, flist):
234+
return any(rule.apply_to_one(db, data) for rule in flist)
224235

225-
def get_check_func(self):
226-
try:
227-
m = getattr(self, "check_" + self.logical_op)
228-
except AttributeError:
229-
m = self.check_and
230-
return m
236+
def get_logical_op(self):
237+
return self.logical_op
231238

232-
def check(self, db, handle):
233-
return self.get_check_func()(db, [handle])
239+
def apply_to_one(self, db, data: dict) -> bool:
240+
"""
241+
Filter-level apply rules to single data item.
242+
"""
243+
if self.logical_op == "and":
244+
res = self.and_test(db, data, self.flist)
245+
elif self.logical_op == "or":
246+
res = self.or_test(db, data, self.flist)
247+
elif self.logical_op == "one":
248+
res = self.one_test(db, data, self.flist)
249+
else:
250+
raise Exception("invalid operator: %r" % self.logical_op)
251+
return res != self.invert
234252

235253
def apply(self, db, id_list=None, tupleind=None, user=None, tree=False):
236254
"""
@@ -249,14 +267,44 @@ def apply(self, db, id_list=None, tupleind=None, user=None, tree=False):
249267
if id_list not given, all items in the database that
250268
match the filter are returned as a list of handles
251269
"""
252-
m = self.get_check_func()
270+
if user:
271+
user.begin_progress(_("Filter"), _("Preparing ..."), len(self.flist) + 1)
272+
# FIXME: this dialog doesn't show often. Adding a time.sleep(0.1) here
273+
# can help on my machine
274+
275+
start_time = time.time()
253276
for rule in self.flist:
277+
if user:
278+
user.step_progress()
254279
rule.requestprepare(db, user)
255-
res = m(db, id_list, user, tupleind, tree)
280+
LOG.debug("Prepare time: %s seconds", time.time() - start_time)
281+
282+
if user:
283+
user.end_progress()
284+
285+
if self.logical_op == "and":
286+
apply_logical_op = self.and_test
287+
elif self.logical_op == "or":
288+
apply_logical_op = self.or_test
289+
elif self.logical_op == "one":
290+
apply_logical_op = self.one_test
291+
else:
292+
raise Exception("invalid operator: %r" % self.logical_op)
293+
294+
start_time = time.time()
295+
res = self.apply_logical_op_to_all(
296+
db, id_list, apply_logical_op, user, tupleind, tree
297+
)
298+
LOG.debug("Apply time: %s seconds", time.time() - start_time)
299+
256300
for rule in self.flist:
257301
rule.requestreset()
302+
258303
return res
259304

305+
def get_object(self, db, handle):
306+
return db.get_person_from_handle(handle)
307+
260308

261309
class GenericFamilyFilter(GenericFilter):
262310
def __init__(self, source=None):
@@ -274,6 +322,9 @@ def find_from_handle(self, db, handle):
274322
def get_number(self, db):
275323
return db.get_number_of_families()
276324

325+
def get_object(self, db, handle):
326+
return db.get_family_from_handle(handle)
327+
277328

278329
class GenericEventFilter(GenericFilter):
279330
def __init__(self, source=None):
@@ -291,6 +342,9 @@ def find_from_handle(self, db, handle):
291342
def get_number(self, db):
292343
return db.get_number_of_events()
293344

345+
def get_object(self, db, handle):
346+
return db.get_event_from_handle(handle)
347+
294348

295349
class GenericSourceFilter(GenericFilter):
296350
def __init__(self, source=None):
@@ -308,6 +362,9 @@ def find_from_handle(self, db, handle):
308362
def get_number(self, db):
309363
return db.get_number_of_sources()
310364

365+
def get_object(self, db, handle):
366+
return db.get_source_from_handle(handle)
367+
311368

312369
class GenericCitationFilter(GenericFilter):
313370
def __init__(self, source=None):
@@ -328,6 +385,9 @@ def find_from_handle(self, db, handle):
328385
def get_number(self, db):
329386
return db.get_number_of_citations()
330387

388+
def get_object(self, db, handle):
389+
return db.get_citation_from_handle(handle)
390+
331391

332392
class GenericPlaceFilter(GenericFilter):
333393
def __init__(self, source=None):
@@ -348,6 +408,9 @@ def find_from_handle(self, db, handle):
348408
def get_number(self, db):
349409
return db.get_number_of_places()
350410

411+
def get_object(self, db, handle):
412+
return db.get_place_from_handle(handle)
413+
351414

352415
class GenericMediaFilter(GenericFilter):
353416
def __init__(self, source=None):
@@ -365,6 +428,9 @@ def find_from_handle(self, db, handle):
365428
def get_number(self, db):
366429
return db.get_number_of_media()
367430

431+
def get_object(self, db, handle):
432+
return db.get_media_from_handle(handle)
433+
368434

369435
class GenericRepoFilter(GenericFilter):
370436
def __init__(self, source=None):
@@ -382,6 +448,9 @@ def find_from_handle(self, db, handle):
382448
def get_number(self, db):
383449
return db.get_number_of_repositories()
384450

451+
def get_object(self, db, handle):
452+
return db.get_repository_from_handle(handle)
453+
385454

386455
class GenericNoteFilter(GenericFilter):
387456
def __init__(self, source=None):
@@ -399,6 +468,9 @@ def find_from_handle(self, db, handle):
399468
def get_number(self, db):
400469
return db.get_number_of_notes()
401470

471+
def get_object(self, db, handle):
472+
return db.get_note_from_handle(handle)
473+
402474

403475
def GenericFilterFactory(namespace):
404476
if namespace == "Person":

0 commit comments

Comments
 (0)