-
Notifications
You must be signed in to change notification settings - Fork 470
DBAPI improvements with SQLite implementation #2101
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
a36c053
0f08c40
fcf70dc
3f611af
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -480,6 +480,35 @@ def get_person_handles(self, sort_handles=False, locale=glocale): | |
self.dbapi.execute("SELECT handle FROM person") | ||
return [row[0] for row in self.dbapi.fetchall()] | ||
|
||
def get_person_handles_cursor(self, sort_handles=False, locale=glocale): | ||
""" | ||
Return a cursor that iterates over person handles without loading | ||
all into memory at once. | ||
|
||
:param sort_handles: If True, the cursor is sorted by surnames. | ||
:type sort_handles: bool | ||
:param locale: The locale to use for collation. | ||
:type locale: A GrampsLocale object. | ||
:returns: Returns a cursor, where supported, or iterator otherwise, over person handles | ||
:rtype: iterator | ||
""" | ||
if hasattr(self.dbapi, "cursor"): | ||
# Use real database cursor for backends that support it | ||
cursor = self.dbapi.cursor() | ||
if sort_handles: | ||
cursor.execute( | ||
"SELECT handle FROM person " | ||
"ORDER BY surname, given_name " | ||
f'COLLATE "{self._collation(locale)}"' | ||
) | ||
else: | ||
cursor.execute("SELECT handle FROM person") | ||
# Return iterator that yields handles one at a time | ||
return (row[0] for row in cursor) | ||
else: | ||
# Fallback to regular list for backends without cursor support | ||
return iter(self.get_person_handles(sort_handles, locale)) | ||
|
||
def get_family_handles(self, sort_handles=False, locale=glocale): | ||
""" | ||
Return a list of database handles, one handle for each Family in | ||
|
@@ -1268,10 +1297,15 @@ def _create_performance_indexes(self): | |
def optimize_database(self): | ||
""" | ||
Optimize the database for better performance. | ||
Backend-specific optimizations should be implemented in subclasses. | ||
""" | ||
self.dbapi.execute("ANALYZE;") | ||
self.dbapi.execute("VACUUM;") | ||
self.dbapi.commit() | ||
# ANALYZE is generally supported across databases | ||
try: | ||
self.dbapi.execute("ANALYZE;") | ||
self.dbapi.commit() | ||
except: | ||
# Some backends may not support ANALYZE | ||
pass | ||
|
||
def bulk_insert(self, table_name, data_list, batch_size=1000): | ||
""" | ||
|
@@ -1699,3 +1733,247 @@ def bulk_get_families(self, handles): | |
results.append(self.serializer.data_to_object(family_data, Family)) | ||
|
||
return results | ||
|
||
def _update_person_auxiliary_data(self, person, old_person=None, trans=None): | ||
""" | ||
Update auxiliary data structures for a person object. | ||
|
||
This method handles all the secondary updates needed when a person | ||
is committed, including gender statistics, surname lists, and | ||
custom type registries. | ||
|
||
:param person: The Person object being committed | ||
:type person: Person | ||
:param old_person: The previous version of the person (for updates) | ||
:type old_person: Person or None | ||
:param trans: The transaction object | ||
:type trans: DbTxn or None | ||
""" | ||
# Update gender statistics | ||
if old_person: | ||
# Update gender statistics if necessary | ||
if (old_person.gender != person.gender or | ||
old_person.primary_name.first_name != person.primary_name.first_name): | ||
self.genderStats.uncount_person(old_person) | ||
self.genderStats.count_person(person) | ||
|
||
# Update surname list if necessary | ||
if self._order_by_person_key(person) != self._order_by_person_key(old_person): | ||
self.remove_from_surname_list(old_person) | ||
self.add_to_surname_list(person, trans.batch if trans else False) | ||
else: | ||
# New person - add to auxiliary structures | ||
self.genderStats.count_person(person) | ||
self.add_to_surname_list(person, trans.batch if trans else False) | ||
|
||
# Type registry updates | ||
self.individual_attributes.update( | ||
[str(attr.type) for attr in person.attribute_list | ||
if attr.type.is_custom() and str(attr.type)] | ||
) | ||
|
||
self.event_role_names.update( | ||
[str(eref.role) for eref in person.event_ref_list | ||
if eref.role.is_custom()] | ||
) | ||
|
||
self.name_types.update( | ||
[str(name.type) for name in ([person.primary_name] + person.alternate_names) | ||
if name.type.is_custom()] | ||
) | ||
|
||
all_surn = [] | ||
all_surn += person.primary_name.get_surname_list() | ||
for asurname in person.alternate_names: | ||
all_surn += asurname.get_surname_list() | ||
self.origin_types.update( | ||
[str(surn.origintype) for surn in all_surn | ||
if surn.origintype.is_custom()] | ||
) | ||
|
||
self.url_types.update( | ||
[str(url.type) for url in person.urls | ||
if url.type.is_custom()] | ||
) | ||
|
||
attr_list = [] | ||
for mref in person.media_list: | ||
attr_list += [str(attr.type) for attr in mref.attribute_list | ||
if attr.type.is_custom() and str(attr.type)] | ||
self.media_attributes.update(attr_list) | ||
|
||
def prepare(self, name, query): | ||
""" | ||
Prepare a statement for execution. Backend-agnostic implementation | ||
that works with any database driver. | ||
|
||
:param name: Name identifier for the prepared statement | ||
:param query: SQL query to prepare | ||
:returns: Prepared statement object or query string | ||
""" | ||
if not hasattr(self, "_prepared_statements"): | ||
self._prepared_statements = {} | ||
|
||
if name not in self._prepared_statements: | ||
if hasattr(self.dbapi, "prepare"): | ||
# For PostgreSQL, MySQL, etc. that support real prepared statements | ||
self._prepared_statements[name] = self.dbapi.prepare(query) | ||
else: | ||
# For SQLite and others - just cache the query string | ||
self._prepared_statements[name] = query | ||
|
||
return self._prepared_statements[name] | ||
|
||
def execute_prepared(self, name, params=None): | ||
""" | ||
Execute a prepared statement by name. | ||
|
||
:param name: Name of the prepared statement | ||
:param params: Parameters for the statement | ||
:returns: Cursor with results | ||
""" | ||
if not hasattr(self, "_prepared_statements"): | ||
raise ValueError(f"No prepared statement '{name}' found") | ||
|
||
stmt = self._prepared_statements.get(name) | ||
if stmt is None: | ||
raise ValueError(f"Prepared statement '{name}' not found") | ||
|
||
if hasattr(stmt, "execute"): | ||
# Real prepared statement object | ||
return stmt.execute(params or []) | ||
else: | ||
# Cached query string | ||
return self.dbapi.execute(stmt, params or []) | ||
|
||
def get_person_from_handle_lazy(self, handle): | ||
""" | ||
Get a person object with lazy loading of related data. | ||
|
||
:param handle: Person handle | ||
:returns: LazyPerson object that loads data on access | ||
""" | ||
# Check if person exists first | ||
self.dbapi.execute("SELECT 1 FROM person WHERE handle = ?", [handle]) | ||
if not self.dbapi.fetchone(): | ||
return None | ||
|
||
class LazyPerson: | ||
"""Proxy object that loads person data on first access.""" | ||
|
||
def __init__(self, handle, db): | ||
self._handle = handle | ||
self._db = db | ||
self._loaded = False | ||
self._person = None | ||
|
||
def _load(self): | ||
if not self._loaded: | ||
self._person = self._db.get_person_from_handle(self._handle) | ||
self._loaded = True | ||
|
||
def __getattr__(self, name): | ||
self._load() | ||
return getattr(self._person, name) | ||
|
||
def __setattr__(self, name, value): | ||
if name.startswith("_"): | ||
object.__setattr__(self, name, value) | ||
else: | ||
self._load() | ||
setattr(self._person, name, value) | ||
|
||
return LazyPerson(handle, self) | ||
|
||
def batch_commit_persons(self, persons, trans): | ||
""" | ||
Commit multiple persons efficiently while maintaining data integrity. | ||
|
||
Uses executemany for database operations and ensures all auxiliary | ||
updates are properly applied. Transactions are atomic - either all | ||
persons are committed or none are. | ||
|
||
:param persons: List of Person objects to commit | ||
:type persons: list[Person] | ||
:param trans: Transaction object (required) | ||
:type trans: DbTxn | ||
:raises ValueError: If trans is None or duplicate handles are detected | ||
""" | ||
if not persons: | ||
return | ||
|
||
# Validate transaction | ||
if trans is None: | ||
raise ValueError("Transaction required for batch operations") | ||
|
||
# Check for duplicate handles | ||
seen_handles = set() | ||
for person in persons: | ||
if person.handle in seen_handles: | ||
raise ValueError(f"Duplicate person handle in batch: {person.handle}") | ||
seen_handles.add(person.handle) | ||
|
||
# Import Person class for deserialization | ||
from gramps.gen.lib import Person | ||
|
||
# Batch fetch existing data for update detection | ||
handles = [p.handle for p in persons] | ||
old_data_map = {} | ||
old_person_map = {} | ||
|
||
if handles: | ||
placeholders = ','.join('?' * len(handles)) | ||
cursor = self.dbapi.execute( | ||
f'SELECT handle, json_data FROM person WHERE handle IN ({placeholders})', | ||
handles | ||
) | ||
for row in cursor.fetchall(): | ||
old_data_map[row[0]] = row[1] | ||
# Pre-deserialize old persons for efficiency | ||
if row[1]: | ||
old_person_map[row[0]] = self.serializer.string_to_object(row[1], Person) | ||
|
||
# Batch database operations | ||
if hasattr(self.dbapi, "executemany"): | ||
data = [] | ||
for person in persons: | ||
handle = person.handle | ||
json_data = self.serializer.object_to_string(person) | ||
# Prepare data for batch insert | ||
data.append( | ||
( | ||
handle, | ||
json_data, | ||
person.gramps_id, | ||
person.gender, | ||
person.primary_name.first_name, | ||
person.primary_name.surname, | ||
) | ||
) | ||
|
||
# Batch insert/update | ||
self.dbapi.executemany( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I've not yet worked out how the same is done if |
||
"INSERT OR REPLACE INTO person " | ||
"(handle, json_data, gramps_id, gender, given_name, surname) " | ||
"VALUES (?, ?, ?, ?, ?, ?)", | ||
data, | ||
) | ||
else: | ||
# Fallback to individual commits | ||
for person in persons: | ||
self._commit_person(person, trans) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if an exception is thrown in the 2nd..Nth call to |
||
# Individual commits already handle auxiliary updates | ||
# so we can return early | ||
return | ||
|
||
# Apply auxiliary updates for all persons | ||
for person in persons: | ||
old_person = old_person_map.get(person.handle) | ||
self._update_person_auxiliary_data(person, old_person, trans) | ||
|
||
# Emit signals AFTER all data is consistent | ||
for person in persons: | ||
if person.handle in old_data_map: | ||
self.emit('person-update', ([person.handle],)) | ||
else: | ||
self.emit('person-add', ([person.handle],)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice!