Skip to content

Commit 83392e2

Browse files
committed
Update CI things
1 parent a24df16 commit 83392e2

File tree

3 files changed

+59
-25
lines changed

3 files changed

+59
-25
lines changed

genealogy/management/commands/demo_ocr.py

Lines changed: 26 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,11 @@
66
python manage.py demo_ocr --clear # Clear previous demo data first
77
"""
88

9-
import os
109
from pathlib import Path
1110

11+
from django.conf import settings
1212
from django.core.files import File
1313
from django.core.management.base import BaseCommand
14-
from django.conf import settings
1514

1615
from genealogy.models import Document, DocumentPage
1716
from genealogy.tasks import process_page_ocr
@@ -28,7 +27,7 @@ def add_arguments(self, parser):
2827
)
2928
parser.add_argument(
3029
"--sync",
31-
action="store_true",
30+
action="store_true",
3231
help="Run OCR synchronously instead of using Celery tasks",
3332
)
3433

@@ -95,18 +94,17 @@ def _clear_demo_data(self):
9594
def _create_demo_document(self, file_path: Path, description: str) -> Document:
9695
"""Create a document for demo purposes"""
9796
title = f"Demo: {file_path.stem} - {description}"
98-
99-
document = Document.objects.create(
97+
98+
return Document.objects.create(
10099
title=title,
101100
languages="eng", # Default to English for demo
102101
)
103-
return document
104102

105103
def _create_pages_for_document(self, document: Document, file_path: Path) -> int:
106104
"""Create document pages from the PDF file"""
107105
with open(file_path, "rb") as f:
108106
django_file = File(f, name=file_path.name)
109-
107+
110108
# For demo, treat each PDF as a single page
111109
# In reality, the admin interface would handle multi-page PDFs
112110
page = DocumentPage.objects.create(
@@ -115,65 +113,71 @@ def _create_pages_for_document(self, document: Document, file_path: Path) -> int
115113
image_file=django_file,
116114
original_filename=file_path.name,
117115
)
118-
116+
119117
return 1
120118

121119
def _process_ocr_sync(self, document: Document):
122120
"""Process OCR synchronously for immediate results"""
123121
self.stdout.write(" 🔄 Processing OCR (synchronous)...")
124-
122+
125123
for page in document.pages.all():
126124
try:
127125
page.validate_for_ocr()
128-
126+
129127
# Import here to avoid import issues
130128
from genealogy.ocr_processor import OCRProcessor
131-
129+
132130
processor = OCRProcessor()
133131
file_path = page.image_file.path
134-
132+
135133
text, confidence, rotation = processor.process_file(file_path)
136-
134+
137135
page.ocr_text = text
138136
page.ocr_confidence = confidence
139137
page.rotation_applied = rotation
140138
page.ocr_completed = True
141139
page.save()
142-
140+
143141
self.stdout.write(
144142
f" ✅ OCR complete - {confidence:.1f}% confidence, "
145143
f"{len(text)} characters extracted"
146144
)
147-
145+
148146
# Show first 100 characters of extracted text
149147
preview = text[:100].replace("\n", " ").strip()
150148
if len(text) > 100:
151149
preview += "..."
152150
self.stdout.write(f" 📝 Preview: {preview}")
153-
151+
154152
except Exception as e:
155153
self.stdout.write(
156-
self.style.ERROR(f" ❌ OCR failed for page {page.page_number}: {e}")
154+
self.style.ERROR(
155+
f" ❌ OCR failed for page {page.page_number}: {e}"
156+
)
157157
)
158158

159159
def _process_ocr_async(self, document: Document):
160160
"""Process OCR using Celery tasks"""
161161
self.stdout.write(" 🔄 Queuing OCR tasks (asynchronous)...")
162-
162+
163163
task_count = 0
164164
for page in document.pages.all():
165165
try:
166166
page.validate_for_ocr()
167167
task = process_page_ocr.delay(str(page.id))
168168
task_count += 1
169-
self.stdout.write(f" 📋 Queued OCR task {task.id} for page {page.page_number}")
169+
self.stdout.write(
170+
f" 📋 Queued OCR task {task.id} for page {page.page_number}"
171+
)
170172
except Exception as e:
171173
self.stdout.write(
172-
self.style.ERROR(f" ❌ Failed to queue OCR for page {page.page_number}: {e}")
174+
self.style.ERROR(
175+
f" ❌ Failed to queue OCR for page {page.page_number}: {e}"
176+
)
173177
)
174-
178+
175179
if task_count > 0:
176180
self.stdout.write(
177181
f" ⏱️ {task_count} OCR task(s) queued. "
178182
"Check the admin interface to see results as they complete."
179-
)
183+
)

genealogy_extractor/settings.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
"""
1212

1313
import os
14+
import sys
1415
from pathlib import Path
1516

1617
# Build paths inside the project like this: BASE_DIR / 'subdir'.
@@ -21,7 +22,10 @@
2122
# See https://docs.djangoproject.com/en/5.2/howto/deployment/checklist/
2223

2324
# SECURITY WARNING: keep the secret key used in production secret!
24-
SECRET_KEY = os.getenv("DJANGO_SECRET_KEY", "django-insecure-*+%i+f9ifcd+hnu==udvxo_qc03_7x8y&nijs6)kp#5u!cct%u")
25+
SECRET_KEY = os.getenv(
26+
"DJANGO_SECRET_KEY",
27+
"django-insecure-*+%i+f9ifcd+hnu==udvxo_qc03_7x8y&nijs6)kp#5u!cct%u",
28+
)
2529

2630
# SECURITY WARNING: don't run with debug turned on in production!
2731
DEBUG = os.getenv("DEBUG", "False").lower() in ("true", "1", "yes")
@@ -136,7 +140,9 @@
136140

137141
# Celery configuration
138142
CELERY_BROKER_URL = f"redis://{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0"
139-
CELERY_RESULT_BACKEND = f"redis://{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0"
143+
CELERY_RESULT_BACKEND = (
144+
f"redis://{os.environ['REDIS_HOST']}:{os.environ['REDIS_PORT']}/0"
145+
)
140146
CELERY_ACCEPT_CONTENT = ["json"]
141147
CELERY_TASK_SERIALIZER = "json"
142148
CELERY_RESULT_SERIALIZER = "json"
@@ -146,3 +152,26 @@
146152
# https://docs.djangoproject.com/en/5.2/ref/settings/#default-auto-field
147153

148154
DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
155+
156+
# Logging configuration
157+
LOGGING = {
158+
"version": 1,
159+
"disable_existing_loggers": False,
160+
"handlers": {
161+
"console": {
162+
"class": "logging.StreamHandler",
163+
"stream": sys.stdout,
164+
},
165+
},
166+
"root": {
167+
"handlers": ["console"],
168+
"level": "WARNING" if "test" in sys.argv else "INFO",
169+
},
170+
"loggers": {
171+
"genealogy": {
172+
"handlers": ["console"],
173+
"level": "WARNING" if "test" in sys.argv else "INFO",
174+
"propagate": False,
175+
},
176+
},
177+
}

requirements-dev.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
ruff==0.1.9
22
mypy==1.8.0
3-
bandit==1.7.5
3+
bandit==1.7.5
4+
django-stubs==4.2.7

0 commit comments

Comments
 (0)