oxylabs · oxy-rostyslav · May 14, 2025 · May 12, 2025 · May 13, 2025
diff --git a/.github/workflows/lint_and_test.yml b/.github/workflows/lint_and_test.yml
@@ -37,4 +37,4 @@ jobs:
 
     - name: Run tests
       run: |
-        uv run pytest --cov=src --cov-report xml --cov-report term --cov-fail-under=90 ./tests
+        uv run pytest --cov=src --cov-report xml --cov-report term --cov-fail-under=90 tests/unit tests/integration
diff --git a/.github/workflows/publish_to_pypi.yml b/.github/workflows/publish_to_pypi.yml
@@ -2,8 +2,8 @@ name: Publish Python 🐍 distributions 📦 to PyPI
 
 on:
   push:
-    branches: [ "main" ]
-
+    tags:
+      - 'v[0-9]+.[0-9]+.[0-9]+'
 jobs:
   build-n-publish:
     name: Build and publish Python distribution to PyPI

diff --git a/.gitignore b/.gitignore
@@ -70,7 +70,7 @@ ipython_config.py
 __pypackages__/
 
 # Environments
-.env
+*/.env
 .venv
 env/
 venv/

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,20 @@
+# Changelog
+
+## [0.2.0] - 2025-05-13
+
+### Added
+
+- Changelog
+- E2E tests
+- Geolocation and User Agent type parameters to universal scraper
+
+### Changed
+
+- Descriptions for tools
+- Descriptions for tool parameters
+- Default values for tool parameters
+
+### Removed
+
+- WebUnblocker tool
+- Parse parameter for universal scraper
diff --git a/Makefile b/Makefile
@@ -7,7 +7,7 @@ virtualenv_dir ?= .venv
 
 .PHONY: install_deps
 install_deps: $(virtualenv_dir)
-	uv sync
+	uv sync --group dev
 
 .PHONY: lint
 lint: install_deps
@@ -22,11 +22,16 @@ format: $(virtualenv_dir)
 
 .PHONY: test
 test: install_deps
-	uv run pytest --cov=src --cov-report xml --cov-report term --cov-fail-under=90 ./tests
+	uv run pytest --cov=src --cov-report xml --cov-report term --cov-fail-under=90 tests/unit tests/integration
+
+.PHONY: test-e2e
+test-e2e:
+	uv sync --group dev --group e2e-tests
+	uv run pytest --cov=src --cov-report xml --cov-report term tests/e2e
 
 .PHONY: run
 run: install_deps
-	npx @modelcontextprotocol/inspector@0.3.0 \
+	npx @modelcontextprotocol/inspector \
       uv \
       --directory $(current_dir) \
       run \

diff --git a/README.md b/README.md
@@ -239,16 +239,6 @@ make run
 ```
 Then access MCP Inspector at `http://localhost:5173`. You may need to add your username and password as environment variables in the inspector under `OXYLABS_USERNAME` and `OXYLABS_PASSWORD`.
 
-
-## 🛠️ Technical Details
-
-This server provides two main tools:
-
-1. **oxylabs_scraper**: Uses Oxylabs Web Scraper API for general website scraping
-2. **oxylabs_web_unblocker**: Uses Oxylabs Web Unblocker for hard-to-access websites
-
-[Web Scraper API](https://oxylabs.io/products/scraper-api/web) supports JavaScript rendering, parsed structured data, and cleaned HTML in Markdown format. [Web Unblocker](https://oxylabs.io/products/web-unblocker) offers JavaScript rendering and cleaned HTML, but doesn’t return parsed data.
-
 ---
 
 ## License

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "oxylabs-mcp"
-version = "0.1.7"
+version = "0.2.0"
 description = "Oxylabs MCP server"
 authors = [
     {name="Augis Braziunas", email="[email protected]"},
@@ -24,7 +24,7 @@ dependencies = [
     "lxml>=5.3.0",
     "lxml-html-clean>=0.4.1",
     "markdownify>=0.14.1",
-    "mcp[cli]>=1.6.0",
+    "mcp[cli]>=1.8.0",
     "pydantic>=2.10.5",
     "pydantic-settings>=2.8.1",
 ]
@@ -40,6 +40,12 @@ dev = [
     "pytest-mock>=3.14.0",
     "ruff>=0.9.1",
 ]
+e2e-tests = [
+    "agno>=1.4.5",
+    "anthropic>=0.50.0",
+    "google-genai>=1.13.0",
+    "openai>=1.77.0",
+]
 
 [build-system]
 requires = ["hatchling"]
@@ -89,7 +95,8 @@ lint.ignore = [
 ]
 
 [tool.ruff.lint.per-file-ignores]
-"tests/*" = ["D", "S101", "ARG001", "ANN", "PT011", "FBT"]
+"tests/*" = ["D", "S101", "ARG001", "ANN", "PT011", "FBT", "PLR2004"]
+"src/oxylabs_mcp/url_params.py" = ["E501"]
 
 [tool.ruff.lint.pycodestyle]
 max-line-length = 100
@@ -100,6 +107,7 @@ lines-after-imports = 2
 
 [tool.pytest.ini_options]
 asyncio_default_fixture_loop_scope = "session"
+asyncio_mode = "auto"
 
 [tool.black]
 line-length = 100
diff --git a/src/oxylabs_mcp/server.py b/src/oxylabs_mcp/server.py
@@ -1,83 +1,53 @@
 from typing import Any
 
 from mcp.server.fastmcp import Context, FastMCP
+from mcp.types import ToolAnnotations
 
 from oxylabs_mcp import url_params
 from oxylabs_mcp.config import settings
 from oxylabs_mcp.exceptions import MCPServerError
-from oxylabs_mcp.utils import (
-    convert_html_to_md,
-    get_content,
-    oxylabs_client,
-    strip_html,
-)
+from oxylabs_mcp.utils import get_content, oxylabs_client
 
 
-mcp = FastMCP("oxylabs_mcp", dependencies=["mcp", "httpx"])
+mcp = FastMCP("oxylabs_mcp")
 
 
-@mcp.tool(
-    name="oxylabs_universal_scraper",
-    description="Scrape url using Oxylabs Web API with universal scraper",
-)
-async def scrape_universal_url(
+@mcp.tool(annotations=ToolAnnotations(readOnlyHint=True))
+async def universal_scraper(
     ctx: Context,  # type: ignore[type-arg]
     url: url_params.URL_PARAM,
-    parse: url_params.PARSE_PARAM = False,  # noqa: FBT002
     render: url_params.RENDER_PARAM = "",
+    user_agent_type: url_params.USER_AGENT_TYPE_PARAM = "",
+    geo_location: url_params.GEO_LOCATION_PARAM = "",
+    output_format: url_params.OUTPUT_FORMAT_PARAM = "",
 ) -> str:
-    """Scrape url using Oxylabs Web API with universal scraper."""
+    """Get a content of any webpage.
+
+    Supports browser rendering, parsing of certain webpages
+    and different output formats.
+    """
     try:
-        async with oxylabs_client(ctx, with_auth=True) as client:
+        async with oxylabs_client(ctx) as client:
             payload: dict[str, Any] = {"url": url}
-            if parse:
-                payload["parse"] = parse
+
             if render:
                 payload["render"] = render
+            if user_agent_type:
+                payload["user_agent_type"] = user_agent_type
+            if geo_location:
+                payload["geo_location"] = geo_location
 
             response = await client.post(settings.OXYLABS_SCRAPER_URL, json=payload)
 
             response.raise_for_status()
 
-            return get_content(response, parse)
-    except MCPServerError as e:
-        return e.stringify()
-
-
-@mcp.tool(
-    name="oxylabs_web_unblocker",
-    description="Scrape url using Oxylabs Web Unblocker",
-)
-async def scrape_with_web_unblocker(
-    ctx: Context,  # type: ignore[type-arg]
-    url: url_params.URL_PARAM,
-    render: url_params.RENDER_PARAM = "",
-) -> str:
-    """Scrape url using Oxylabs Web Unblocker.
-
-    This tool manages the unblocking process to extract public data
-    even from the most difficult websites.
-    """
-    headers: dict[str, Any] = {}
-    if render:
-        headers["X-Oxylabs-Render"] = render
-
-    try:
-        async with oxylabs_client(ctx, with_proxy=True, verify=False, headers=headers) as client:
-            response = await client.get(url)
-
-            response.raise_for_status()
-
-            return convert_html_to_md(strip_html(response.text))
+            return get_content(response, output_format=output_format)
     except MCPServerError as e:
         return e.stringify()
 
 
-@mcp.tool(
-    name="oxylabs_google_search_scraper",
-    description="Scrape Google Search results using Oxylabs Web API",
-)
-async def scrape_google_search(
+@mcp.tool(annotations=ToolAnnotations(readOnlyHint=True))
+async def google_search_scraper(
     ctx: Context,  # type: ignore[type-arg]
     query: url_params.GOOGLE_QUERY_PARAM,
     parse: url_params.PARSE_PARAM = True,  # noqa: FBT002
@@ -90,10 +60,15 @@ async def scrape_google_search(
     geo_location: url_params.GEO_LOCATION_PARAM = "",
     locale: url_params.LOCALE_PARAM = "",
     ad_mode: url_params.AD_MODE_PARAM = False,  # noqa: FBT002
+    output_format: url_params.OUTPUT_FORMAT_PARAM = "",
 ) -> str:
-    """Scrape Google Search results using Oxylabs Web API."""
+    """Scrape Google Search results.
+
+    Supports content parsing, different user agent types, pagination,
+    domain, geolocation, locale parameters and different output formats.
+    """
     try:
-        async with oxylabs_client(ctx, with_auth=True) as client:
+        async with oxylabs_client(ctx) as client:
             payload: dict[str, Any] = {"query": query}
 
             if ad_mode:
@@ -124,16 +99,13 @@ async def scrape_google_search(
 
             response.raise_for_status()
 
-            return get_content(response, parse)
+            return get_content(response, parse=parse, output_format=output_format)
     except MCPServerError as e:
         return e.stringify()
 
 
-@mcp.tool(
-    name="oxylabs_amazon_search_scraper",
-    description="Scrape Amazon Search results using Oxylabs Web API",
-)
-async def scrape_amazon_search(
+@mcp.tool(annotations=ToolAnnotations(readOnlyHint=True))
+async def amazon_search_scraper(
     ctx: Context,  # type: ignore[type-arg]
     query: url_params.AMAZON_SEARCH_QUERY_PARAM,
     category_id: url_params.CATEGORY_ID_CONTEXT_PARAM = "",
@@ -147,10 +119,16 @@ async def scrape_amazon_search(
     domain: url_params.DOMAIN_PARAM = "",
     geo_location: url_params.GEO_LOCATION_PARAM = "",
     locale: url_params.LOCALE_PARAM = "",
+    output_format: url_params.OUTPUT_FORMAT_PARAM = "",
 ) -> str:
-    """Scrape Amazon Search results using Oxylabs Web API."""
+    """Scrape Amazon search results.
+
+    Supports content parsing, different user agent types, pagination,
+    domain, geolocation, locale parameters and different output formats.
+    Supports Amazon specific parameters such as category id, merchant id, currency.
+    """
     try:
-        async with oxylabs_client(ctx, with_auth=True) as client:
+        async with oxylabs_client(ctx) as client:
             payload: dict[str, Any] = {"source": "amazon_search", "query": query}
 
             context = []
@@ -184,16 +162,13 @@ async def scrape_amazon_search(
 
             response.raise_for_status()
 
-            return get_content(response, parse)
+            return get_content(response, parse=parse, output_format=output_format)
     except MCPServerError as e:
         return e.stringify()
 
 
-@mcp.tool(
-    name="oxylabs_amazon_product_scraper",
-    description="Scrape Amazon Products using Oxylabs Web API",
-)
-async def scrape_amazon_products(
+@mcp.tool(annotations=ToolAnnotations(readOnlyHint=True))
+async def amazon_product_scraper(
     ctx: Context,  # type: ignore[type-arg]
     query: url_params.AMAZON_SEARCH_QUERY_PARAM,
     autoselect_variant: url_params.AUTOSELECT_VARIANT_CONTEXT_PARAM = False,  # noqa: FBT002
@@ -204,10 +179,17 @@ async def scrape_amazon_products(
     domain: url_params.DOMAIN_PARAM = "",
     geo_location: url_params.GEO_LOCATION_PARAM = "",
     locale: url_params.LOCALE_PARAM = "",
+    output_format: url_params.OUTPUT_FORMAT_PARAM = "",
 ) -> str:
-    """Scrape Amazon Products using Oxylabs Web API."""
+    """Scrape Amazon products.
+
+    Supports content parsing, different user agent types, domain,
+    geolocation, locale parameters and different output formats.
+    Supports Amazon specific parameters such as currency and getting
+    more accurate pricing data with auto select variant.
+    """
     try:
-        async with oxylabs_client(ctx, with_auth=True) as client:
+        async with oxylabs_client(ctx) as client:
             payload: dict[str, Any] = {"source": "amazon_product", "query": query}
 
             context = []
@@ -235,7 +217,7 @@ async def scrape_amazon_products(
 
             response.raise_for_status()
 
-            return get_content(response, parse)
+            return get_content(response, parse=parse, output_format=output_format)
     except MCPServerError as e:
         return e.stringify()
-Original file line number
+Diff line change
@@ Expand Up / @@ -70,7 +70,7 @@ ipython_config.py @@
     __pypackages__/
     # Environments
-    .env
+    */.env
     .venv
     env/
     venv/
@@ Expand Down @@