|
| 1 | +import os |
1 | 2 | import sys |
2 | 3 | import logging |
| 4 | +import json |
| 5 | + |
| 6 | +from django.conf import settings |
3 | 7 |
|
4 | 8 | from core.models import AsyncMigrationStatus |
5 | 9 | from core.redis import start_job_async_or_sync |
| 10 | +from core.utils.common import batch |
| 11 | +from data_export.models import DataExport |
| 12 | +from data_export.serializers import ExportDataSerializer |
6 | 13 | from organizations.models import Organization |
7 | 14 | from projects.models import Project |
| 15 | +from tasks.models import Task |
| 16 | +from data_export.mixins import ExportMixin |
8 | 17 |
|
9 | 18 |
|
10 | 19 | def calculate_stats_all_orgs(from_scratch, redis): |
@@ -63,3 +72,49 @@ def redis_job_for_calculation(org, from_scratch): |
63 | 72 | f"End processing counters for project <{project.title}> ({project.id}), " |
64 | 73 | f"processed {str(task_count)} tasks" |
65 | 74 | ) |
| 75 | + |
| 76 | + |
| 77 | +def export_project(project_id, export_format, path, serializer_context=None): |
| 78 | + logger = logging.getLogger(__name__) |
| 79 | + |
| 80 | + project = Project.objects.get(id=project_id) |
| 81 | + |
| 82 | + export_format = export_format.upper() |
| 83 | + supported_formats = [s['name'] for s in DataExport.get_export_formats(project)] |
| 84 | + assert export_format in supported_formats, f'Export format is not supported, please use {supported_formats}' |
| 85 | + |
| 86 | + task_ids = ( |
| 87 | + Task.objects.filter(project=project) |
| 88 | + .select_related("project") |
| 89 | + .prefetch_related("annotations", "predictions") |
| 90 | + ) |
| 91 | + |
| 92 | + logger.debug(f"Start exporting project <{project.title}> ({project.id}) with task count {task_ids.count()}.") |
| 93 | + |
| 94 | + # serializer context |
| 95 | + if isinstance(serializer_context, str): |
| 96 | + serializer_context = json.loads(serializer_context) |
| 97 | + serializer_options = ExportMixin._get_export_serializer_option(serializer_context) |
| 98 | + |
| 99 | + # export cycle |
| 100 | + tasks = [] |
| 101 | + for _task_ids in batch(task_ids, 1000): |
| 102 | + tasks += ExportDataSerializer( |
| 103 | + _task_ids, |
| 104 | + many=True, |
| 105 | + **serializer_options |
| 106 | + ).data |
| 107 | + |
| 108 | + # convert to output format |
| 109 | + export_stream, _, filename = DataExport.generate_export_file( |
| 110 | + project, tasks, export_format, settings.CONVERTER_DOWNLOAD_RESOURCES, {} |
| 111 | + ) |
| 112 | + |
| 113 | + # write to file |
| 114 | + filepath = os.path.join(path, filename) if os.path.isdir(path) else path |
| 115 | + with open(filepath, "wb") as file: |
| 116 | + file.write(export_stream.read()) |
| 117 | + |
| 118 | + logger.debug(f"End exporting project <{project.title}> ({project.id}) in {export_format} format.") |
| 119 | + |
| 120 | + return filepath |
0 commit comments