Skip to content
Closed
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
247 changes: 244 additions & 3 deletions gramps/plugins/db/dbapi/dbapi.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,34 @@ def get_person_handles(self, sort_handles=False, locale=glocale):
self.dbapi.execute("SELECT handle FROM person")
return [row[0] for row in self.dbapi.fetchall()]

def get_person_cursor(self, sort_handles=False, locale=glocale):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
def get_person_cursor(self, sort_handles=False, locale=glocale):
def get_person_handles_cursor(self, sort_handles=False, locale=glocale):

For consistency with get_person_handles, should this method be called get_person_handles_cursor?

"""
Return a cursor that iterates over person handles without loading
all into memory at once.

:param sort_handles: If True, the cursor is sorted by surnames.
:type sort_handles: bool
:param locale: The locale to use for collation.
:type locale: A GrampsLocale object.
:returns: Iterator over person handles
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
:returns: Iterator over person handles
:returns: returns a cursor, where supported, or iterator otherwise, over person handles

"""
if hasattr(self.dbapi, "cursor"):
# Use real database cursor for backends that support it
cursor = self.dbapi.cursor()
if sort_handles:
cursor.execute(
"SELECT handle FROM person "
"ORDER BY surname, given_name "
f'COLLATE "{self._collation(locale)}"'
)
else:
cursor.execute("SELECT handle FROM person")
# Return iterator that yields handles one at a time
return (row[0] for row in cursor)
else:
# Fallback to regular list for backends without cursor support
return iter(self.get_person_handles(sort_handles, locale))

def get_family_handles(self, sort_handles=False, locale=glocale):
"""
Return a list of database handles, one handle for each Family in
Expand Down Expand Up @@ -1268,10 +1296,15 @@ def _create_performance_indexes(self):
def optimize_database(self):
"""
Optimize the database for better performance.
Backend-specific optimizations should be implemented in subclasses.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice!

"""
self.dbapi.execute("ANALYZE;")
self.dbapi.execute("VACUUM;")
self.dbapi.commit()
# ANALYZE is generally supported across databases
try:
self.dbapi.execute("ANALYZE;")
self.dbapi.commit()
except:
# Some backends may not support ANALYZE
pass

def bulk_insert(self, table_name, data_list, batch_size=1000):
"""
Expand Down Expand Up @@ -1699,3 +1732,211 @@ def bulk_get_families(self, handles):
results.append(self.serializer.data_to_object(family_data, Family))

return results

# -------------------------------------------------------------------------
# Enhanced DBAPI Methods - Real Cursors, Lazy Loading, Prepared Statements
# -------------------------------------------------------------------------

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# -------------------------------------------------------------------------
# Enhanced DBAPI Methods - Real Cursors, Lazy Loading, Prepared Statements
# -------------------------------------------------------------------------

I was not sure that this comment added much and would be tempted to remove it (plus the cursor method is now higher up the file)

def prepare(self, name, query):
"""
Prepare a statement for execution. Backend-agnostic implementation
that works with any database driver.

:param name: Name identifier for the prepared statement
:param query: SQL query to prepare
:returns: Prepared statement object or query string
"""
if not hasattr(self, "_prepared_statements"):
self._prepared_statements = {}

if name not in self._prepared_statements:
if hasattr(self.dbapi, "prepare"):
# For PostgreSQL, MySQL, etc. that support real prepared statements
self._prepared_statements[name] = self.dbapi.prepare(query)
else:
# For SQLite and others - just cache the query string
self._prepared_statements[name] = query

return self._prepared_statements[name]

def execute_prepared(self, name, params=None):
"""
Execute a prepared statement by name.

:param name: Name of the prepared statement
:param params: Parameters for the statement
:returns: Cursor with results
"""
if not hasattr(self, "_prepared_statements"):
raise ValueError(f"No prepared statement '{name}' found")

stmt = self._prepared_statements.get(name)
if stmt is None:
raise ValueError(f"Prepared statement '{name}' not found")

if hasattr(stmt, "execute"):
# Real prepared statement object
return stmt.execute(params or [])
else:
# Cached query string
return self.dbapi.execute(stmt, params or [])

def get_person_from_handle_lazy(self, handle):
"""
Get a person object with lazy loading of related data.

:param handle: Person handle
:returns: LazyPerson object that loads data on access
"""
# Check if person exists first
self.dbapi.execute("SELECT 1 FROM person WHERE handle = ?", [handle])
if not self.dbapi.fetchone():
return None

class LazyPerson:
"""Proxy object that loads person data on first access."""

def __init__(self, handle, db):
self._handle = handle
self._db = db
self._loaded = False
self._person = None

def _load(self):
if not self._loaded:
self._person = self._db.get_person_from_handle(self._handle)
self._loaded = True

def __getattr__(self, name):
self._load()
return getattr(self._person, name)

def __setattr__(self, name, value):
if name.startswith("_"):
object.__setattr__(self, name, value)
else:
self._load()
setattr(self._person, name, value)

return LazyPerson(handle, self)

def batch_commit_persons(self, persons, trans):
"""
Commit multiple persons efficiently while maintaining data integrity.

Uses executemany for database operations and ensures all auxiliary
updates are properly applied. Structured to benefit from future
batch optimizations automatically.

:param persons: List of Person objects to commit
:param trans: Transaction object
"""
if not persons:
return

# Batch fetch existing data for update detection
handles = [p.handle for p in persons]
old_data_map = {}

if handles and hasattr(self.dbapi, 'execute'):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if handles and hasattr(self.dbapi, 'execute'):
if handles:

Is it possible for dbapi to not have an execute method?

placeholders = ','.join('?' * len(handles))
cursor = self.dbapi.execute(
f'SELECT handle, json_data FROM person WHERE handle IN ({placeholders})',
handles
)
for row in cursor.fetchall():
old_data_map[row[0]] = row[1]

# Batch database operations
if hasattr(self.dbapi, "executemany"):
data = []
for person in persons:
handle = person.handle
json_data = self.serializer.object_to_string(person)
# Prepare data for batch insert
data.append(
(
handle,
json_data,
person.gramps_id,
person.gender,
person.primary_name.first_name,
person.primary_name.surname,
)
)

# Batch insert/update
self.dbapi.executemany(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.commit_person does some additional work, updating gender stats, surname lists etc. See here.

I've not yet worked out how the same is done if self.dbapi.executemany is called, partly because I've not yet located executemany!

"INSERT OR REPLACE INTO person "
"(handle, json_data, gramps_id, gender, given_name, surname) "
"VALUES (?, ?, ?, ?, ?, ?)",
data,
)
else:
# Fallback to individual commits
for person in persons:
self._commit_person(person, trans)
Copy link
Contributor

@stevenyoungs stevenyoungs Aug 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What happens if an exception is thrown in the 2nd..Nth call to self._commit_person?
Are we guaranteed to be in a transaction such that any earlier calls to _commit_person are guaranteed to be rolled back?
i.e. the trans parameter can never be None


# Apply auxiliary updates (COMPLETING THE IMPLEMENTATION)
Copy link
Contributor

@stevenyoungs stevenyoungs Aug 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
# Apply auxiliary updates (COMPLETING THE IMPLEMENTATION)
# Apply auxiliary updates

Remove the part in parentheses as in years to come the context will be lost

from gramps.gen.lib import Person

for person in persons:
old_data = old_data_map.get(person.handle)

if old_data:
# Deserialize old person for comparison
old_person = self.serializer.string_to_object(old_data, Person)

# Update gender statistics if necessary
if (old_person.gender != person.gender or
old_person.primary_name.first_name != person.primary_name.first_name):
self.genderStats.uncount_person(old_person)
self.genderStats.count_person(person)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to protect against the persons list containing the same Person record two (or more) times, with different attributes? If such input data were constructed, this secondary data could become out of sync; we'd uncount the old_person twice whilst calling count_person for each of the new Person records
However I think it is an unlikely scenario, and would likely require a deepcopy. It might not be worth protecting against.


# Update surname list if necessary
if self._order_by_person_key(person) != self._order_by_person_key(old_person):
self.remove_from_surname_list(old_person)
self.add_to_surname_list(person, trans.batch)
else:
# New person - add to auxiliary structures
self.genderStats.count_person(person)
self.add_to_surname_list(person, trans.batch)

# Type registry updates (same as commit_person)
self.individual_attributes.update(
[str(attr.type) for attr in person.attribute_list
if attr.type.is_custom() and str(attr.type)]
)

self.event_role_names.update(
[str(eref.role) for eref in person.event_ref_list
if eref.role.is_custom()]
)

self.name_types.update(
[str(name.type) for name in ([person.primary_name] + person.alternate_names)
if name.type.is_custom()]
)

all_surn = []
all_surn += person.primary_name.get_surname_list()
for asurname in person.alternate_names:
all_surn += asurname.get_surname_list()
self.origin_types.update(
[str(surn.origintype) for surn in all_surn
if surn.origintype.is_custom()]
)

self.url_types.update(
[str(url.type) for url in person.urls
if url.type.is_custom()]
)

attr_list = []
for mref in person.media_list:
attr_list += [str(attr.type) for attr in mref.attribute_list
if attr.type.is_custom() and str(attr.type)]
self.media_attributes.update(attr_list)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd be tempted to move this in to a private method which is shared by the current DbGeneric.commit_person method as well as your new batch_commit_persons method. That way any future change in this logic only has to be made in one place

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is so much refactoring that need to be done here. Every time I look at it I find more problems. There's SQLITE SQL in DBGeneric. I have a lot more comprehensice ideas about the entire storage layer. Fixing this stuff bit by bit is almost messier than just biting the bullet and properly abstracting at the right levels.


# Emit signal for GUI updates
self.emit('person-add', ([person.handle],))
Copy link
Contributor

@stevenyoungs stevenyoungs Aug 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

by emitting within the for loop, a person-add signal is generated whilst the DB is in an inconsistent state; the Person records have been fully updated but we have not yet made all of the corresponding updates to genderstats, surname lists, individual attributes etc.
Is it better to complete all data updates and then have a second loop to emit the signals? That way the data is fully consistent when each signal is emitted.

15 changes: 15 additions & 0 deletions gramps/plugins/db/dbapi/sqlite.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,21 @@ class SQLite(DBAPI):
SQLite interface.
"""

def optimize_database(self):
"""
SQLite-specific database optimization including VACUUM.
"""
# Call parent class optimization first
super().optimize_database()

# SQLite-specific optimizations
try:
self.dbapi.execute("VACUUM;")
self.dbapi.commit()
except Exception as e:
# VACUUM might fail if there are active connections
self.log.warning(f"Could not VACUUM database: {e}")

def get_summary(self):
"""
Return a dictionary of information about this database backend.
Expand Down