-
Notifications
You must be signed in to change notification settings - Fork 470
DBAPI improvements with SQLite implementation #2101
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 3 commits
a36c053
0f08c40
fcf70dc
3f611af
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -480,6 +480,34 @@ def get_person_handles(self, sort_handles=False, locale=glocale): | |||||||
self.dbapi.execute("SELECT handle FROM person") | ||||||||
return [row[0] for row in self.dbapi.fetchall()] | ||||||||
|
||||||||
def get_person_cursor(self, sort_handles=False, locale=glocale): | ||||||||
""" | ||||||||
Return a cursor that iterates over person handles without loading | ||||||||
all into memory at once. | ||||||||
|
||||||||
:param sort_handles: If True, the cursor is sorted by surnames. | ||||||||
:type sort_handles: bool | ||||||||
:param locale: The locale to use for collation. | ||||||||
:type locale: A GrampsLocale object. | ||||||||
:returns: Iterator over person handles | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
""" | ||||||||
if hasattr(self.dbapi, "cursor"): | ||||||||
# Use real database cursor for backends that support it | ||||||||
cursor = self.dbapi.cursor() | ||||||||
if sort_handles: | ||||||||
cursor.execute( | ||||||||
"SELECT handle FROM person " | ||||||||
"ORDER BY surname, given_name " | ||||||||
f'COLLATE "{self._collation(locale)}"' | ||||||||
) | ||||||||
else: | ||||||||
cursor.execute("SELECT handle FROM person") | ||||||||
# Return iterator that yields handles one at a time | ||||||||
return (row[0] for row in cursor) | ||||||||
else: | ||||||||
# Fallback to regular list for backends without cursor support | ||||||||
return iter(self.get_person_handles(sort_handles, locale)) | ||||||||
|
||||||||
def get_family_handles(self, sort_handles=False, locale=glocale): | ||||||||
""" | ||||||||
Return a list of database handles, one handle for each Family in | ||||||||
|
@@ -1268,10 +1296,15 @@ def _create_performance_indexes(self): | |||||||
def optimize_database(self): | ||||||||
""" | ||||||||
Optimize the database for better performance. | ||||||||
Backend-specific optimizations should be implemented in subclasses. | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nice! |
||||||||
""" | ||||||||
self.dbapi.execute("ANALYZE;") | ||||||||
self.dbapi.execute("VACUUM;") | ||||||||
self.dbapi.commit() | ||||||||
# ANALYZE is generally supported across databases | ||||||||
try: | ||||||||
self.dbapi.execute("ANALYZE;") | ||||||||
self.dbapi.commit() | ||||||||
except: | ||||||||
# Some backends may not support ANALYZE | ||||||||
pass | ||||||||
|
||||||||
def bulk_insert(self, table_name, data_list, batch_size=1000): | ||||||||
""" | ||||||||
|
@@ -1699,3 +1732,211 @@ def bulk_get_families(self, handles): | |||||||
results.append(self.serializer.data_to_object(family_data, Family)) | ||||||||
|
||||||||
return results | ||||||||
|
||||||||
# ------------------------------------------------------------------------- | ||||||||
# Enhanced DBAPI Methods - Real Cursors, Lazy Loading, Prepared Statements | ||||||||
# ------------------------------------------------------------------------- | ||||||||
|
||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
I was not sure that this comment added much and would be tempted to remove it (plus the cursor method is now higher up the file) |
||||||||
def prepare(self, name, query): | ||||||||
""" | ||||||||
Prepare a statement for execution. Backend-agnostic implementation | ||||||||
that works with any database driver. | ||||||||
|
||||||||
:param name: Name identifier for the prepared statement | ||||||||
:param query: SQL query to prepare | ||||||||
:returns: Prepared statement object or query string | ||||||||
""" | ||||||||
if not hasattr(self, "_prepared_statements"): | ||||||||
self._prepared_statements = {} | ||||||||
|
||||||||
if name not in self._prepared_statements: | ||||||||
if hasattr(self.dbapi, "prepare"): | ||||||||
# For PostgreSQL, MySQL, etc. that support real prepared statements | ||||||||
self._prepared_statements[name] = self.dbapi.prepare(query) | ||||||||
else: | ||||||||
# For SQLite and others - just cache the query string | ||||||||
self._prepared_statements[name] = query | ||||||||
|
||||||||
return self._prepared_statements[name] | ||||||||
|
||||||||
def execute_prepared(self, name, params=None): | ||||||||
""" | ||||||||
Execute a prepared statement by name. | ||||||||
|
||||||||
:param name: Name of the prepared statement | ||||||||
:param params: Parameters for the statement | ||||||||
:returns: Cursor with results | ||||||||
""" | ||||||||
if not hasattr(self, "_prepared_statements"): | ||||||||
raise ValueError(f"No prepared statement '{name}' found") | ||||||||
|
||||||||
stmt = self._prepared_statements.get(name) | ||||||||
if stmt is None: | ||||||||
raise ValueError(f"Prepared statement '{name}' not found") | ||||||||
|
||||||||
if hasattr(stmt, "execute"): | ||||||||
# Real prepared statement object | ||||||||
return stmt.execute(params or []) | ||||||||
else: | ||||||||
# Cached query string | ||||||||
return self.dbapi.execute(stmt, params or []) | ||||||||
|
||||||||
def get_person_from_handle_lazy(self, handle): | ||||||||
""" | ||||||||
Get a person object with lazy loading of related data. | ||||||||
|
||||||||
:param handle: Person handle | ||||||||
:returns: LazyPerson object that loads data on access | ||||||||
""" | ||||||||
# Check if person exists first | ||||||||
self.dbapi.execute("SELECT 1 FROM person WHERE handle = ?", [handle]) | ||||||||
if not self.dbapi.fetchone(): | ||||||||
return None | ||||||||
|
||||||||
class LazyPerson: | ||||||||
"""Proxy object that loads person data on first access.""" | ||||||||
|
||||||||
def __init__(self, handle, db): | ||||||||
self._handle = handle | ||||||||
self._db = db | ||||||||
self._loaded = False | ||||||||
self._person = None | ||||||||
|
||||||||
def _load(self): | ||||||||
if not self._loaded: | ||||||||
self._person = self._db.get_person_from_handle(self._handle) | ||||||||
self._loaded = True | ||||||||
|
||||||||
def __getattr__(self, name): | ||||||||
self._load() | ||||||||
return getattr(self._person, name) | ||||||||
|
||||||||
def __setattr__(self, name, value): | ||||||||
if name.startswith("_"): | ||||||||
object.__setattr__(self, name, value) | ||||||||
else: | ||||||||
self._load() | ||||||||
setattr(self._person, name, value) | ||||||||
|
||||||||
return LazyPerson(handle, self) | ||||||||
|
||||||||
def batch_commit_persons(self, persons, trans): | ||||||||
""" | ||||||||
Commit multiple persons efficiently while maintaining data integrity. | ||||||||
|
||||||||
Uses executemany for database operations and ensures all auxiliary | ||||||||
updates are properly applied. Structured to benefit from future | ||||||||
batch optimizations automatically. | ||||||||
|
||||||||
:param persons: List of Person objects to commit | ||||||||
:param trans: Transaction object | ||||||||
""" | ||||||||
if not persons: | ||||||||
return | ||||||||
|
||||||||
# Batch fetch existing data for update detection | ||||||||
handles = [p.handle for p in persons] | ||||||||
old_data_map = {} | ||||||||
|
||||||||
if handles and hasattr(self.dbapi, 'execute'): | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Is it possible for |
||||||||
placeholders = ','.join('?' * len(handles)) | ||||||||
cursor = self.dbapi.execute( | ||||||||
f'SELECT handle, json_data FROM person WHERE handle IN ({placeholders})', | ||||||||
handles | ||||||||
) | ||||||||
for row in cursor.fetchall(): | ||||||||
old_data_map[row[0]] = row[1] | ||||||||
|
||||||||
# Batch database operations | ||||||||
if hasattr(self.dbapi, "executemany"): | ||||||||
data = [] | ||||||||
for person in persons: | ||||||||
handle = person.handle | ||||||||
json_data = self.serializer.object_to_string(person) | ||||||||
# Prepare data for batch insert | ||||||||
data.append( | ||||||||
( | ||||||||
handle, | ||||||||
json_data, | ||||||||
person.gramps_id, | ||||||||
person.gender, | ||||||||
person.primary_name.first_name, | ||||||||
person.primary_name.surname, | ||||||||
) | ||||||||
) | ||||||||
|
||||||||
# Batch insert/update | ||||||||
self.dbapi.executemany( | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I've not yet worked out how the same is done if |
||||||||
"INSERT OR REPLACE INTO person " | ||||||||
"(handle, json_data, gramps_id, gender, given_name, surname) " | ||||||||
"VALUES (?, ?, ?, ?, ?, ?)", | ||||||||
data, | ||||||||
) | ||||||||
else: | ||||||||
# Fallback to individual commits | ||||||||
for person in persons: | ||||||||
self._commit_person(person, trans) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if an exception is thrown in the 2nd..Nth call to |
||||||||
|
||||||||
# Apply auxiliary updates (COMPLETING THE IMPLEMENTATION) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Remove the part in parentheses as in years to come the context will be lost |
||||||||
from gramps.gen.lib import Person | ||||||||
|
||||||||
for person in persons: | ||||||||
old_data = old_data_map.get(person.handle) | ||||||||
|
||||||||
if old_data: | ||||||||
# Deserialize old person for comparison | ||||||||
old_person = self.serializer.string_to_object(old_data, Person) | ||||||||
|
||||||||
# Update gender statistics if necessary | ||||||||
if (old_person.gender != person.gender or | ||||||||
old_person.primary_name.first_name != person.primary_name.first_name): | ||||||||
self.genderStats.uncount_person(old_person) | ||||||||
self.genderStats.count_person(person) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need to protect against the |
||||||||
|
||||||||
# Update surname list if necessary | ||||||||
if self._order_by_person_key(person) != self._order_by_person_key(old_person): | ||||||||
self.remove_from_surname_list(old_person) | ||||||||
self.add_to_surname_list(person, trans.batch) | ||||||||
else: | ||||||||
# New person - add to auxiliary structures | ||||||||
self.genderStats.count_person(person) | ||||||||
self.add_to_surname_list(person, trans.batch) | ||||||||
|
||||||||
# Type registry updates (same as commit_person) | ||||||||
self.individual_attributes.update( | ||||||||
[str(attr.type) for attr in person.attribute_list | ||||||||
if attr.type.is_custom() and str(attr.type)] | ||||||||
) | ||||||||
|
||||||||
self.event_role_names.update( | ||||||||
[str(eref.role) for eref in person.event_ref_list | ||||||||
if eref.role.is_custom()] | ||||||||
) | ||||||||
|
||||||||
self.name_types.update( | ||||||||
[str(name.type) for name in ([person.primary_name] + person.alternate_names) | ||||||||
if name.type.is_custom()] | ||||||||
) | ||||||||
|
||||||||
all_surn = [] | ||||||||
all_surn += person.primary_name.get_surname_list() | ||||||||
for asurname in person.alternate_names: | ||||||||
all_surn += asurname.get_surname_list() | ||||||||
self.origin_types.update( | ||||||||
[str(surn.origintype) for surn in all_surn | ||||||||
if surn.origintype.is_custom()] | ||||||||
) | ||||||||
|
||||||||
self.url_types.update( | ||||||||
[str(url.type) for url in person.urls | ||||||||
if url.type.is_custom()] | ||||||||
) | ||||||||
|
||||||||
attr_list = [] | ||||||||
for mref in person.media_list: | ||||||||
attr_list += [str(attr.type) for attr in mref.attribute_list | ||||||||
if attr.type.is_custom() and str(attr.type)] | ||||||||
self.media_attributes.update(attr_list) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd be tempted to move this in to a private method which is shared by the current There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There is so much refactoring that need to be done here. Every time I look at it I find more problems. There's SQLITE SQL in DBGeneric. I have a lot more comprehensice ideas about the entire storage layer. Fixing this stuff bit by bit is almost messier than just biting the bullet and properly abstracting at the right levels. |
||||||||
|
||||||||
# Emit signal for GUI updates | ||||||||
self.emit('person-add', ([person.handle],)) | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. by emitting within the for loop, a |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For consistency with
get_person_handles
, should this method be calledget_person_handles_cursor
?