Skip to content

Commit f997fc6

Browse files
feat: DEV-3145: Add command for export project in Opensource (#2824)
* Create command to export project * Implement function to export project * Add unit tests * Add extra optional argument to path * Update unit tests * Remove expanded drafts from tasks data * Import EXPORT_DIR directly from base settings * Use path join to assert filepath * Update export documentation * Fixes * Avoid success message if export fails * Update unit tests * Update export documentation * Max fixes Co-authored-by: makseq-ubnt <[email protected]>
1 parent b85a4a5 commit f997fc6

File tree

7 files changed

+145
-2
lines changed

7 files changed

+145
-2
lines changed

docs/source/guide/export.md

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,20 @@ Use the following steps to export data and annotations from the Label Studio UI.
3636

3737
### Export timeout in Community Edition
3838

39-
If the export times out, see how to [export snapshots using the SDK](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.export_snapshot_create) or [API](#Export-snapshots-using-the-API).
39+
If the export times out, see how to [export snapshots using the SDK](https://labelstud.io/sdk/project.html#label_studio_sdk.project.Project.export_snapshot_create) or [API](#Export-snapshots-using-the-API). You can also use a [console command](#Export-using-console-command) to export your project. For more information, see the following section.
40+
41+
### Export using console command
42+
43+
Use the following command to export data and annotations.
44+
45+
```shell
46+
label-studio export <project-id> <export-format> --path=<output-path>
47+
```
48+
49+
To enable logs:
50+
```shell
51+
DEBUG=1 LOG_LEVEL=DEBUG label-studio export <project-id> <export-format> --path=<output-path>
52+
```
4053

4154
### Export all tasks including tasks without annotations
4255

label_studio/core/argparser.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import os
44
import json
55

6+
from .settings.base import EXPORT_DIR
67
from .utils.io import find_file
78

89

@@ -133,6 +134,18 @@ def project_name(raw_name):
133134
'--from-scratch', dest='from_scratch', default=False, action='store_true', help='Recalculate from scratch'
134135
)
135136

137+
# export_project sub-command parser
138+
export_project = subparsers.add_parser('export', help='Export project in a specific format', parents=[root_parser])
139+
export_project.add_argument('project_id', help='Project ID')
140+
export_project.add_argument('export_format', help='Export format (JSON, JSON_MIN, CSV, etc)')
141+
export_project.add_argument('--export-path', help='Export file path or directory', default=EXPORT_DIR)
142+
default_params = '{"annotations__completed_by": {"only_id": null}, "interpolate_key_frames": true}'
143+
export_project.add_argument(
144+
'--export-serializer-context',
145+
help=f"Export serializer context, default value: '{default_params}'",
146+
default=default_params
147+
)
148+
136149
args = parser.parse_args(input_args)
137150

138151
if not hasattr(args, 'label_config'):

label_studio/data_export/mixins.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,8 @@ def _get_filtered_annotations_queryset(self, annotation_filter_options=None):
115115
q = reduce(lambda x, y: x | y, q_list)
116116
return queryset.filter(q)
117117

118-
def _get_export_serializer_option(self, serialization_options):
118+
@staticmethod
119+
def _get_export_serializer_option(serialization_options):
119120
options = {'expand': []}
120121
if isinstance(serialization_options, dict):
121122
if (

label_studio/server.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -306,6 +306,21 @@ def main():
306306
calculate_stats_all_orgs(input_args.from_scratch, redis=True)
307307
return
308308

309+
if input_args.command == 'export':
310+
from tasks.functions import export_project
311+
312+
try:
313+
filename = export_project(
314+
input_args.project_id, input_args.export_format, input_args.export_path,
315+
serializer_context=input_args.export_serializer_context
316+
)
317+
except Exception as e:
318+
logger.exception(f'Failed to export project: {e}')
319+
else:
320+
logger.info(f'Project exported successfully: {filename}')
321+
322+
return
323+
309324
# print version
310325
if input_args.command == 'version' or input_args.version:
311326
from label_studio import __version__

label_studio/tasks/functions.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,19 @@
1+
import os
12
import sys
23
import logging
4+
import json
5+
6+
from django.conf import settings
37

48
from core.models import AsyncMigrationStatus
59
from core.redis import start_job_async_or_sync
10+
from core.utils.common import batch
11+
from data_export.models import DataExport
12+
from data_export.serializers import ExportDataSerializer
613
from organizations.models import Organization
714
from projects.models import Project
15+
from tasks.models import Task
16+
from data_export.mixins import ExportMixin
817

918

1019
def calculate_stats_all_orgs(from_scratch, redis):
@@ -63,3 +72,49 @@ def redis_job_for_calculation(org, from_scratch):
6372
f"End processing counters for project <{project.title}> ({project.id}), "
6473
f"processed {str(task_count)} tasks"
6574
)
75+
76+
77+
def export_project(project_id, export_format, path, serializer_context=None):
78+
logger = logging.getLogger(__name__)
79+
80+
project = Project.objects.get(id=project_id)
81+
82+
export_format = export_format.upper()
83+
supported_formats = [s['name'] for s in DataExport.get_export_formats(project)]
84+
assert export_format in supported_formats, f'Export format is not supported, please use {supported_formats}'
85+
86+
task_ids = (
87+
Task.objects.filter(project=project)
88+
.select_related("project")
89+
.prefetch_related("annotations", "predictions")
90+
)
91+
92+
logger.debug(f"Start exporting project <{project.title}> ({project.id}) with task count {task_ids.count()}.")
93+
94+
# serializer context
95+
if isinstance(serializer_context, str):
96+
serializer_context = json.loads(serializer_context)
97+
serializer_options = ExportMixin._get_export_serializer_option(serializer_context)
98+
99+
# export cycle
100+
tasks = []
101+
for _task_ids in batch(task_ids, 1000):
102+
tasks += ExportDataSerializer(
103+
_task_ids,
104+
many=True,
105+
**serializer_options
106+
).data
107+
108+
# convert to output format
109+
export_stream, _, filename = DataExport.generate_export_file(
110+
project, tasks, export_format, settings.CONVERTER_DOWNLOAD_RESOURCES, {}
111+
)
112+
113+
# write to file
114+
filepath = os.path.join(path, filename) if os.path.isdir(path) else path
115+
with open(filepath, "wb") as file:
116+
file.write(export_stream.read())
117+
118+
logger.debug(f"End exporting project <{project.title}> ({project.id}) in {export_format} format.")
119+
120+
return filepath

label_studio/tests/tasks/__init__.py

Whitespace-only changes.
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
import io
2+
import os
3+
import pytest
4+
5+
from django.conf import settings
6+
7+
from data_export.serializers import ExportDataSerializer
8+
from tasks.functions import export_project
9+
10+
pytestmark = pytest.mark.django_db
11+
12+
13+
class TestExportProject:
14+
@pytest.fixture
15+
def generate_export_file(self, mocker):
16+
return mocker.patch(
17+
"tasks.functions.DataExport.generate_export_file",
18+
return_value=(io.BytesIO(b"stream"), "application/json", "project.json"),
19+
)
20+
21+
@pytest.fixture
22+
def project(self, configured_project):
23+
return configured_project
24+
25+
def test_export_project(self, mocker, generate_export_file, project):
26+
data = ExportDataSerializer(
27+
project.tasks.all(),
28+
many=True,
29+
context={"interpolate_key_frames": settings.INTERPOLATE_KEY_FRAMES},
30+
).data
31+
32+
with mocker.patch("builtins.open"):
33+
filepath = export_project(project.id, "JSON", settings.EXPORT_DIR)
34+
35+
assert filepath == os.path.join(settings.EXPORT_DIR, "project.json")
36+
37+
generate_export_file.assert_called_once_with(
38+
project, data, "JSON", settings.CONVERTER_DOWNLOAD_RESOURCES, {}
39+
)
40+
41+
def test_project_does_not_exist(self, mocker, generate_export_file):
42+
with mocker.patch("builtins.open"):
43+
with pytest.raises(Exception):
44+
export_project(1, "JSON", settings.EXPORT_DIR)
45+
46+
generate_export_file.assert_not_called()

0 commit comments

Comments
 (0)