1
+ import { CrawlImpl , CrawlSuccessResult } from '../type' ;
2
+ import { NetworkConnectionError , PageNotFoundError , TimeoutError } from '../utils/errorType' ;
3
+
4
+ interface Search1ApiResponse {
5
+ crawlParameters : {
6
+ url : string ;
7
+ } ;
8
+ results : {
9
+ title ?: string ;
10
+ link ?: string ;
11
+ content ?: string ;
12
+ } ;
13
+ }
14
+
15
+ const TIMEOUT_CONTROL = 10_000 ;
16
+
17
+ const withTimeout = < T > ( promise : Promise < T > , ms : number ) : Promise < T > => {
18
+ const controller = new AbortController ( ) ;
19
+ const timeoutPromise = new Promise < T > ( ( _ , reject ) => {
20
+ setTimeout ( ( ) => {
21
+ controller . abort ( ) ;
22
+ reject ( new TimeoutError ( `Request timeout after ${ ms } ms` ) ) ;
23
+ } , ms ) ;
24
+ } ) ;
25
+
26
+ return Promise . race ( [ promise , timeoutPromise ] ) ;
27
+ } ;
28
+
29
+ export const search1api : CrawlImpl = async ( url , { filterOptions } ) => {
30
+ // Get API key from environment variable
31
+ const apiKey = process . env . SEARCH1API_API_KEY ;
32
+
33
+ if ( ! apiKey ) {
34
+ throw new Error ( 'SEARCH1API_API_KEY environment variable is not set' ) ;
35
+ }
36
+
37
+ let res : Response ;
38
+
39
+ try {
40
+ res = await withTimeout (
41
+ fetch ( 'https://api.search1api.com/crawl' , {
42
+ method : 'POST' ,
43
+ headers : {
44
+ 'Authorization' : `Bearer ${ apiKey } ` ,
45
+ 'Content-Type' : 'application/json' ,
46
+ } ,
47
+ body : JSON . stringify ( {
48
+ url,
49
+ } ) ,
50
+ } ) ,
51
+ TIMEOUT_CONTROL ,
52
+ ) ;
53
+ } catch ( e ) {
54
+ const error = e as Error ;
55
+ if ( error . message === 'fetch failed' ) {
56
+ throw new NetworkConnectionError ( ) ;
57
+ }
58
+
59
+ if ( error instanceof TimeoutError ) {
60
+ throw error ;
61
+ }
62
+
63
+ throw e ;
64
+ }
65
+
66
+ if ( ! res . ok ) {
67
+ if ( res . status === 404 ) {
68
+ throw new PageNotFoundError ( res . statusText ) ;
69
+ }
70
+
71
+ throw new Error ( `Search1API request failed with status ${ res . status } : ${ res . statusText } ` ) ;
72
+ }
73
+
74
+ try {
75
+ const data = await res . json ( ) as Search1ApiResponse ;
76
+
77
+ // Check if content is empty or too short
78
+ if ( ! data . results . content || data . results . content . length < 100 ) {
79
+ return ;
80
+ }
81
+
82
+ return {
83
+ content : data . results . content ,
84
+ contentType : 'text' ,
85
+ title : data . results . title ,
86
+ description : data . results . title , // Using title as description since API doesn't provide a separate description
87
+ length : data . results . content . length ,
88
+ siteName : new URL ( url ) . hostname ,
89
+ url : data . results . link || url ,
90
+ } satisfies CrawlSuccessResult ;
91
+ } catch ( error ) {
92
+ console . error ( error ) ;
93
+ }
94
+
95
+ return ;
96
+ } ;
0 commit comments