Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
284 changes: 281 additions & 3 deletions gramps/plugins/db/dbapi/dbapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,35 @@ def get_person_handles(self, sort_handles=False, locale=glocale):
self.dbapi.execute("SELECT handle FROM person")
return [row[0] for row in self.dbapi.fetchall()]

def get_person_handles_cursor(self, sort_handles=False, locale=glocale):
"""
Return a cursor that iterates over person handles without loading
all into memory at once.

:param sort_handles: If True, the cursor is sorted by surnames.
:type sort_handles: bool
:param locale: The locale to use for collation.
:type locale: A GrampsLocale object.
:returns: Returns a cursor, where supported, or iterator otherwise, over person handles
:rtype: iterator
"""
if hasattr(self.dbapi, "cursor"):
# Use real database cursor for backends that support it
cursor = self.dbapi.cursor()
if sort_handles:
cursor.execute(
"SELECT handle FROM person "
"ORDER BY surname, given_name "
f'COLLATE "{self._collation(locale)}"'
)
else:
cursor.execute("SELECT handle FROM person")
# Return iterator that yields handles one at a time
return (row[0] for row in cursor)
else:
# Fallback to regular list for backends without cursor support
return iter(self.get_person_handles(sort_handles, locale))

def get_family_handles(self, sort_handles=False, locale=glocale):
"""
Return a list of database handles, one handle for each Family in
Expand Down Expand Up @@ -1268,10 +1297,15 @@ def _create_performance_indexes(self):
def optimize_database(self):
"""
Optimize the database for better performance.
Backend-specific optimizations should be implemented in subclasses.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

"""
self.dbapi.execute("ANALYZE;")
self.dbapi.execute("VACUUM;")
self.dbapi.commit()
# ANALYZE is generally supported across databases
try:
self.dbapi.execute("ANALYZE;")
self.dbapi.commit()
except:
# Some backends may not support ANALYZE
pass

def bulk_insert(self, table_name, data_list, batch_size=1000):
"""
Expand Down Expand Up @@ -1699,3 +1733,247 @@ def bulk_get_families(self, handles):
results.append(self.serializer.data_to_object(family_data, Family))

return results

def _update_person_auxiliary_data(self, person, old_person=None, trans=None):
"""
Update auxiliary data structures for a person object.

This method handles all the secondary updates needed when a person
is committed, including gender statistics, surname lists, and
custom type registries.

:param person: The Person object being committed
:type person: Person
:param old_person: The previous version of the person (for updates)
:type old_person: Person or None
:param trans: The transaction object
:type trans: DbTxn or None
"""
# Update gender statistics
if old_person:
# Update gender statistics if necessary
if (old_person.gender != person.gender or
old_person.primary_name.first_name != person.primary_name.first_name):
self.genderStats.uncount_person(old_person)
self.genderStats.count_person(person)

# Update surname list if necessary
if self._order_by_person_key(person) != self._order_by_person_key(old_person):
self.remove_from_surname_list(old_person)
self.add_to_surname_list(person, trans.batch if trans else False)
else:
# New person - add to auxiliary structures
self.genderStats.count_person(person)
self.add_to_surname_list(person, trans.batch if trans else False)

# Type registry updates
self.individual_attributes.update(
[str(attr.type) for attr in person.attribute_list
if attr.type.is_custom() and str(attr.type)]
)

self.event_role_names.update(
[str(eref.role) for eref in person.event_ref_list
if eref.role.is_custom()]
)

self.name_types.update(
[str(name.type) for name in ([person.primary_name] + person.alternate_names)
if name.type.is_custom()]
)

all_surn = []
all_surn += person.primary_name.get_surname_list()
for asurname in person.alternate_names:
all_surn += asurname.get_surname_list()
self.origin_types.update(
[str(surn.origintype) for surn in all_surn
if surn.origintype.is_custom()]
)

self.url_types.update(
[str(url.type) for url in person.urls
if url.type.is_custom()]
)

attr_list = []
for mref in person.media_list:
attr_list += [str(attr.type) for attr in mref.attribute_list
if attr.type.is_custom() and str(attr.type)]
self.media_attributes.update(attr_list)

def prepare(self, name, query):
"""
Prepare a statement for execution. Backend-agnostic implementation
that works with any database driver.

:param name: Name identifier for the prepared statement
:param query: SQL query to prepare
:returns: Prepared statement object or query string
"""
if not hasattr(self, "_prepared_statements"):
self._prepared_statements = {}

if name not in self._prepared_statements:
if hasattr(self.dbapi, "prepare"):
# For PostgreSQL, MySQL, etc. that support real prepared statements
self._prepared_statements[name] = self.dbapi.prepare(query)
else:
# For SQLite and others - just cache the query string
self._prepared_statements[name] = query

return self._prepared_statements[name]

def execute_prepared(self, name, params=None):
"""
Execute a prepared statement by name.

:param name: Name of the prepared statement
:param params: Parameters for the statement
:returns: Cursor with results
"""
if not hasattr(self, "_prepared_statements"):
raise ValueError(f"No prepared statement '{name}' found")

stmt = self._prepared_statements.get(name)
if stmt is None:
raise ValueError(f"Prepared statement '{name}' not found")

if hasattr(stmt, "execute"):
# Real prepared statement object
return stmt.execute(params or [])
else:
# Cached query string
return self.dbapi.execute(stmt, params or [])

def get_person_from_handle_lazy(self, handle):
"""
Get a person object with lazy loading of related data.

:param handle: Person handle
:returns: LazyPerson object that loads data on access
"""
# Check if person exists first
self.dbapi.execute("SELECT 1 FROM person WHERE handle = ?", [handle])
if not self.dbapi.fetchone():
return None

class LazyPerson:
"""Proxy object that loads person data on first access."""

def __init__(self, handle, db):
self._handle = handle
self._db = db
self._loaded = False
self._person = None

def _load(self):
if not self._loaded:
self._person = self._db.get_person_from_handle(self._handle)
self._loaded = True

def __getattr__(self, name):
self._load()
return getattr(self._person, name)

def __setattr__(self, name, value):
if name.startswith("_"):
object.__setattr__(self, name, value)
else:
self._load()
setattr(self._person, name, value)

return LazyPerson(handle, self)

def batch_commit_persons(self, persons, trans):
"""
Commit multiple persons efficiently while maintaining data integrity.

Uses executemany for database operations and ensures all auxiliary
updates are properly applied. Transactions are atomic - either all
persons are committed or none are.

:param persons: List of Person objects to commit
:type persons: list[Person]
:param trans: Transaction object (required)
:type trans: DbTxn
:raises ValueError: If trans is None or duplicate handles are detected
"""
if not persons:
return

# Validate transaction
if trans is None:
raise ValueError("Transaction required for batch operations")

# Check for duplicate handles
seen_handles = set()
for person in persons:
if person.handle in seen_handles:
raise ValueError(f"Duplicate person handle in batch: {person.handle}")
seen_handles.add(person.handle)

# Import Person class for deserialization
from gramps.gen.lib import Person

# Batch fetch existing data for update detection
handles = [p.handle for p in persons]
old_data_map = {}
old_person_map = {}

if handles:
placeholders = ','.join('?' * len(handles))
cursor = self.dbapi.execute(
f'SELECT handle, json_data FROM person WHERE handle IN ({placeholders})',
handles
)
for row in cursor.fetchall():
old_data_map[row[0]] = row[1]
# Pre-deserialize old persons for efficiency
if row[1]:
old_person_map[row[0]] = self.serializer.string_to_object(row[1], Person)

# Batch database operations
if hasattr(self.dbapi, "executemany"):
data = []
for person in persons:
handle = person.handle
json_data = self.serializer.object_to_string(person)
# Prepare data for batch insert
data.append(
(
handle,
json_data,
person.gramps_id,
person.gender,
person.primary_name.first_name,
person.primary_name.surname,
)
)

# Batch insert/update
self.dbapi.executemany(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.commit_person does some additional work, updating gender stats, surname lists etc. See here.

I've not yet worked out how the same is done if self.dbapi.executemany is called, partly because I've not yet located executemany!

"INSERT OR REPLACE INTO person "
"(handle, json_data, gramps_id, gender, given_name, surname) "
"VALUES (?, ?, ?, ?, ?, ?)",
data,
)
else:
# Fallback to individual commits
for person in persons:
self._commit_person(person, trans)
Copy link
Contributor

@stevenyoungs stevenyoungs Aug 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if an exception is thrown in the 2nd..Nth call to self._commit_person?
Are we guaranteed to be in a transaction such that any earlier calls to _commit_person are guaranteed to be rolled back?
i.e. the trans parameter can never be None

# Individual commits already handle auxiliary updates
# so we can return early
return

# Apply auxiliary updates for all persons
for person in persons:
old_person = old_person_map.get(person.handle)
self._update_person_auxiliary_data(person, old_person, trans)

# Emit signals AFTER all data is consistent
for person in persons:
if person.handle in old_data_map:
self.emit('person-update', ([person.handle],))
else:
self.emit('person-add', ([person.handle],))
15 changes: 15 additions & 0 deletions gramps/plugins/db/dbapi/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,21 @@ class SQLite(DBAPI):
SQLite interface.
"""

def optimize_database(self):
"""
SQLite-specific database optimization including VACUUM.
"""
# Call parent class optimization first
super().optimize_database()

# SQLite-specific optimizations
try:
self.dbapi.execute("VACUUM;")
self.dbapi.commit()
except Exception as e:
# VACUUM might fail if there are active connections
self.log.warning(f"Could not VACUUM database: {e}")

def get_summary(self):
"""
Return a dictionary of information about this database backend.
Expand Down