Skip to content

Commit cf3ffff

Browse files
committed
feat: Allow configuring inline asset size threshold
1 parent 2b769cb commit cf3ffff

File tree

5 files changed

+32
-31
lines changed

5 files changed

+32
-31
lines changed

apps/workers/constants.ts

Lines changed: 0 additions & 1 deletion
This file was deleted.

apps/workers/workers/adminMaintenance/tasks/migrateLinkHtmlContent.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,10 @@ import {
1111
newAssetId,
1212
saveAsset,
1313
} from "@karakeep/shared/assetdb";
14+
import serverConfig from "@karakeep/shared/config";
1415
import logger from "@karakeep/shared/logger";
1516
import { tryCatch } from "@karakeep/shared/tryCatch";
1617

17-
import { HTML_CONTENT_SIZE_THRESHOLD } from "../../../constants";
1818
import { updateAsset } from "../../../workerUtils";
1919

2020
const BATCH_SIZE = 25;
@@ -40,12 +40,12 @@ async function getBookmarksWithLargeInlineHtml(limit: number, cursor?: string) {
4040
gt(bookmarkLinks.id, cursor),
4141
isNotNull(bookmarkLinks.htmlContent),
4242
isNull(bookmarkLinks.contentAssetId),
43-
sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${HTML_CONTENT_SIZE_THRESHOLD}`,
43+
sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${serverConfig.crawler.htmlContentSizeThreshold}`,
4444
)
4545
: and(
4646
isNotNull(bookmarkLinks.htmlContent),
4747
isNull(bookmarkLinks.contentAssetId),
48-
sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${HTML_CONTENT_SIZE_THRESHOLD}`,
48+
sql`length(CAST(${bookmarkLinks.htmlContent} AS BLOB)) > ${serverConfig.crawler.htmlContentSizeThreshold}`,
4949
),
5050
)
5151
.orderBy(asc(bookmarkLinks.id))
@@ -62,7 +62,7 @@ async function migrateBookmarkHtml(
6262

6363
const contentSize = Buffer.byteLength(htmlContent, "utf8");
6464

65-
if (contentSize <= HTML_CONTENT_SIZE_THRESHOLD) {
65+
if (contentSize <= serverConfig.crawler.htmlContentSizeThreshold) {
6666
logger.debug(
6767
`[adminMaintenance:migrate_large_link_html][${jobId}] Bookmark ${bookmarkId} inline HTML (${contentSize} bytes) below threshold, skipping`,
6868
);

apps/workers/workers/crawlerWorker.ts

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,6 @@ import {
7373
import { tryCatch } from "@karakeep/shared/tryCatch";
7474
import { BookmarkTypes } from "@karakeep/shared/types/bookmarks";
7575

76-
import { HTML_CONTENT_SIZE_THRESHOLD } from "../constants";
7776
import metascraperReddit from "../metascraper-plugins/metascraper-reddit";
7877

7978
function abortPromise(signal: AbortSignal): Promise<never> {
@@ -934,7 +933,7 @@ async function storeHtmlContent(
934933
const contentSize = contentBuffer.byteLength;
935934

936935
// Only store in assets if content is >= 50KB
937-
if (contentSize < HTML_CONTENT_SIZE_THRESHOLD) {
936+
if (contentSize < serverConfig.crawler.htmlContentSizeThreshold) {
938937
logger.info(
939938
`[Crawler][${jobId}] HTML content size (${contentSize} bytes) is below threshold, storing inline`,
940939
);

0 commit comments

Comments
 (0)