Skip to content

Commit f03f152

Browse files
committed
[Personalize] Truncating all string columns
1 parent d0c0def commit f03f152

File tree

3 files changed

+6
-25
lines changed

3 files changed

+6
-25
lines changed

src/analytics/constants/personalize_constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131

3232
# Limits for fields to prevent data bloat
3333
MAX_HUB_IDS = 20
34-
MAX_TITLE_LENGTH = 950 # Character limit for TITLE field
3534

3635
# CSV Headers (in order for the CSV file)
3736
CSV_HEADERS = [
@@ -95,7 +94,8 @@
9594
]
9695

9796
# Text field maximum length (to prevent CSV cell overflow)
98-
MAX_TEXT_LENGTH = 10000
97+
# Applied to both TITLE and TEXT fields
98+
MAX_TEXT_LENGTH = 950
9999

100100
# ITEM_TYPE mapping for Personalize export
101101
# Maps internal document_type to Personalize-friendly type names

src/analytics/items/personalize_item_mapper.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,10 +28,7 @@
2828
TWEET_COUNT_TOTAL,
2929
UPVOTE_SCORE,
3030
)
31-
from analytics.utils.personalize_item_utils import (
32-
prepare_text_for_personalize,
33-
prepare_title,
34-
)
31+
from analytics.utils.personalize_item_utils import prepare_text_for_personalize
3532
from utils.time import datetime_to_epoch_seconds
3633

3734

@@ -171,7 +168,7 @@ def _map_paper_fields(
171168
text_concat = f"{title} {abstract} {hub_names}"
172169

173170
fields = {
174-
TITLE: prepare_title(title),
171+
TITLE: prepare_text_for_personalize(title),
175172
TEXT: prepare_text_for_personalize(text_concat),
176173
CITATION_COUNT_TOTAL: paper.citations if paper.citations is not None else 0,
177174
}
@@ -193,6 +190,6 @@ def _map_post_fields(self, prefetched_doc: PrefetchedUnifiedDocument, post) -> d
193190
text_concat = f"{title} {renderable_text} {hub_names}"
194191

195192
return {
196-
TITLE: prepare_title(title),
193+
TITLE: prepare_text_for_personalize(title),
197194
TEXT: prepare_text_for_personalize(text_concat),
198195
}

src/analytics/utils/personalize_item_utils.py

Lines changed: 1 addition & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import re
66
from typing import Optional
77

8-
from analytics.constants.personalize_constants import MAX_TEXT_LENGTH, MAX_TITLE_LENGTH
8+
from analytics.constants.personalize_constants import MAX_TEXT_LENGTH
99

1010

1111
def prepare_text_for_personalize(text: Optional[str]) -> Optional[str]:
@@ -30,19 +30,3 @@ def prepare_text_for_personalize(text: Optional[str]) -> Optional[str]:
3030
text = text[:MAX_TEXT_LENGTH]
3131

3232
return text if text else None
33-
34-
35-
def prepare_title(title: Optional[str]) -> Optional[str]:
36-
"""
37-
Prepare title for CSV export with length limit.
38-
39-
Cleans HTML, normalizes whitespace, and limits to MAX_TITLE_LENGTH.
40-
"""
41-
# Use existing text preparation logic
42-
prepared = prepare_text_for_personalize(title)
43-
44-
# Apply title-specific length limit
45-
if prepared and len(prepared) > MAX_TITLE_LENGTH:
46-
prepared = prepared[:MAX_TITLE_LENGTH]
47-
48-
return prepared

0 commit comments

Comments
 (0)