1
1
from typing import Any
2
2
3
3
from mcp .server .fastmcp import Context , FastMCP
4
+ from mcp .types import ToolAnnotations
4
5
5
6
from oxylabs_mcp import url_params
6
7
from oxylabs_mcp .config import settings
7
8
from oxylabs_mcp .exceptions import MCPServerError
8
- from oxylabs_mcp .utils import (
9
- convert_html_to_md ,
10
- get_content ,
11
- oxylabs_client ,
12
- strip_html ,
13
- )
9
+ from oxylabs_mcp .utils import get_content , oxylabs_client
14
10
15
11
16
- mcp = FastMCP ("oxylabs_mcp" , dependencies = [ "mcp" , "httpx" ] )
12
+ mcp = FastMCP ("oxylabs_mcp" )
17
13
18
14
19
- @mcp .tool (
20
- name = "oxylabs_universal_scraper" ,
21
- description = "Scrape url using Oxylabs Web API with universal scraper" ,
22
- )
23
- async def scrape_universal_url (
15
+ @mcp .tool (annotations = ToolAnnotations (readOnlyHint = True ))
16
+ async def universal_scraper (
24
17
ctx : Context , # type: ignore[type-arg]
25
18
url : url_params .URL_PARAM ,
26
- parse : url_params .PARSE_PARAM = False , # noqa: FBT002
27
19
render : url_params .RENDER_PARAM = "" ,
20
+ user_agent_type : url_params .USER_AGENT_TYPE_PARAM = "" ,
21
+ geo_location : url_params .GEO_LOCATION_PARAM = "" ,
22
+ output_format : url_params .OUTPUT_FORMAT_PARAM = "" ,
28
23
) -> str :
29
- """Scrape url using Oxylabs Web API with universal scraper."""
24
+ """Get a content of any webpage.
25
+
26
+ Supports browser rendering, parsing of certain webpages
27
+ and different output formats.
28
+ """
30
29
try :
31
- async with oxylabs_client (ctx , with_auth = True ) as client :
30
+ async with oxylabs_client (ctx ) as client :
32
31
payload : dict [str , Any ] = {"url" : url }
33
- if parse :
34
- payload ["parse" ] = parse
32
+
35
33
if render :
36
34
payload ["render" ] = render
35
+ if user_agent_type :
36
+ payload ["user_agent_type" ] = user_agent_type
37
+ if geo_location :
38
+ payload ["geo_location" ] = geo_location
37
39
38
40
response = await client .post (settings .OXYLABS_SCRAPER_URL , json = payload )
39
41
40
42
response .raise_for_status ()
41
43
42
- return get_content (response , parse )
43
- except MCPServerError as e :
44
- return e .stringify ()
45
-
46
-
47
- @mcp .tool (
48
- name = "oxylabs_web_unblocker" ,
49
- description = "Scrape url using Oxylabs Web Unblocker" ,
50
- )
51
- async def scrape_with_web_unblocker (
52
- ctx : Context , # type: ignore[type-arg]
53
- url : url_params .URL_PARAM ,
54
- render : url_params .RENDER_PARAM = "" ,
55
- ) -> str :
56
- """Scrape url using Oxylabs Web Unblocker.
57
-
58
- This tool manages the unblocking process to extract public data
59
- even from the most difficult websites.
60
- """
61
- headers : dict [str , Any ] = {}
62
- if render :
63
- headers ["X-Oxylabs-Render" ] = render
64
-
65
- try :
66
- async with oxylabs_client (ctx , with_proxy = True , verify = False , headers = headers ) as client :
67
- response = await client .get (url )
68
-
69
- response .raise_for_status ()
70
-
71
- return convert_html_to_md (strip_html (response .text ))
44
+ return get_content (response , output_format = output_format )
72
45
except MCPServerError as e :
73
46
return e .stringify ()
74
47
75
48
76
- @mcp .tool (
77
- name = "oxylabs_google_search_scraper" ,
78
- description = "Scrape Google Search results using Oxylabs Web API" ,
79
- )
80
- async def scrape_google_search (
49
+ @mcp .tool (annotations = ToolAnnotations (readOnlyHint = True ))
50
+ async def google_search_scraper (
81
51
ctx : Context , # type: ignore[type-arg]
82
52
query : url_params .GOOGLE_QUERY_PARAM ,
83
53
parse : url_params .PARSE_PARAM = True , # noqa: FBT002
@@ -90,10 +60,15 @@ async def scrape_google_search(
90
60
geo_location : url_params .GEO_LOCATION_PARAM = "" ,
91
61
locale : url_params .LOCALE_PARAM = "" ,
92
62
ad_mode : url_params .AD_MODE_PARAM = False , # noqa: FBT002
63
+ output_format : url_params .OUTPUT_FORMAT_PARAM = "" ,
93
64
) -> str :
94
- """Scrape Google Search results using Oxylabs Web API."""
65
+ """Scrape Google Search results.
66
+
67
+ Supports content parsing, different user agent types, pagination,
68
+ domain, geolocation, locale parameters and different output formats.
69
+ """
95
70
try :
96
- async with oxylabs_client (ctx , with_auth = True ) as client :
71
+ async with oxylabs_client (ctx ) as client :
97
72
payload : dict [str , Any ] = {"query" : query }
98
73
99
74
if ad_mode :
@@ -124,16 +99,13 @@ async def scrape_google_search(
124
99
125
100
response .raise_for_status ()
126
101
127
- return get_content (response , parse )
102
+ return get_content (response , parse = parse , output_format = output_format )
128
103
except MCPServerError as e :
129
104
return e .stringify ()
130
105
131
106
132
- @mcp .tool (
133
- name = "oxylabs_amazon_search_scraper" ,
134
- description = "Scrape Amazon Search results using Oxylabs Web API" ,
135
- )
136
- async def scrape_amazon_search (
107
+ @mcp .tool (annotations = ToolAnnotations (readOnlyHint = True ))
108
+ async def amazon_search_scraper (
137
109
ctx : Context , # type: ignore[type-arg]
138
110
query : url_params .AMAZON_SEARCH_QUERY_PARAM ,
139
111
category_id : url_params .CATEGORY_ID_CONTEXT_PARAM = "" ,
@@ -147,10 +119,16 @@ async def scrape_amazon_search(
147
119
domain : url_params .DOMAIN_PARAM = "" ,
148
120
geo_location : url_params .GEO_LOCATION_PARAM = "" ,
149
121
locale : url_params .LOCALE_PARAM = "" ,
122
+ output_format : url_params .OUTPUT_FORMAT_PARAM = "" ,
150
123
) -> str :
151
- """Scrape Amazon Search results using Oxylabs Web API."""
124
+ """Scrape Amazon search results.
125
+
126
+ Supports content parsing, different user agent types, pagination,
127
+ domain, geolocation, locale parameters and different output formats.
128
+ Supports Amazon specific parameters such as category id, merchant id, currency.
129
+ """
152
130
try :
153
- async with oxylabs_client (ctx , with_auth = True ) as client :
131
+ async with oxylabs_client (ctx ) as client :
154
132
payload : dict [str , Any ] = {"source" : "amazon_search" , "query" : query }
155
133
156
134
context = []
@@ -184,16 +162,13 @@ async def scrape_amazon_search(
184
162
185
163
response .raise_for_status ()
186
164
187
- return get_content (response , parse )
165
+ return get_content (response , parse = parse , output_format = output_format )
188
166
except MCPServerError as e :
189
167
return e .stringify ()
190
168
191
169
192
- @mcp .tool (
193
- name = "oxylabs_amazon_product_scraper" ,
194
- description = "Scrape Amazon Products using Oxylabs Web API" ,
195
- )
196
- async def scrape_amazon_products (
170
+ @mcp .tool (annotations = ToolAnnotations (readOnlyHint = True ))
171
+ async def amazon_product_scraper (
197
172
ctx : Context , # type: ignore[type-arg]
198
173
query : url_params .AMAZON_SEARCH_QUERY_PARAM ,
199
174
autoselect_variant : url_params .AUTOSELECT_VARIANT_CONTEXT_PARAM = False , # noqa: FBT002
@@ -204,10 +179,17 @@ async def scrape_amazon_products(
204
179
domain : url_params .DOMAIN_PARAM = "" ,
205
180
geo_location : url_params .GEO_LOCATION_PARAM = "" ,
206
181
locale : url_params .LOCALE_PARAM = "" ,
182
+ output_format : url_params .OUTPUT_FORMAT_PARAM = "" ,
207
183
) -> str :
208
- """Scrape Amazon Products using Oxylabs Web API."""
184
+ """Scrape Amazon products.
185
+
186
+ Supports content parsing, different user agent types, domain,
187
+ geolocation, locale parameters and different output formats.
188
+ Supports Amazon specific parameters such as currency and getting
189
+ more accurate pricing data with auto select variant.
190
+ """
209
191
try :
210
- async with oxylabs_client (ctx , with_auth = True ) as client :
192
+ async with oxylabs_client (ctx ) as client :
211
193
payload : dict [str , Any ] = {"source" : "amazon_product" , "query" : query }
212
194
213
195
context = []
@@ -235,7 +217,7 @@ async def scrape_amazon_products(
235
217
236
218
response .raise_for_status ()
237
219
238
- return get_content (response , parse )
220
+ return get_content (response , parse = parse , output_format = output_format )
239
221
except MCPServerError as e :
240
222
return e .stringify ()
241
223
0 commit comments