D4Vinci
diff --git a/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎README.md‎
Lines changed: 2 additions & 3 deletions b/‎README.md‎
Lines changed: 2 additions & 3 deletions
diff --git a/‎docs/fetching/dynamic.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/fetching/dynamic.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 4 additions & 4 deletions b/‎pyproject.toml‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎scrapling/__init__.py‎
Lines changed: 1 addition & 1 deletion b/‎scrapling/__init__.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎scrapling/cli.py‎
Lines changed: 4 additions & 4 deletions b/‎scrapling/cli.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎scrapling/core/custom_types.py‎
Lines changed: 2 additions & 2 deletions b/‎scrapling/core/custom_types.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎scrapling/core/shell.py‎
Lines changed: 4 additions & 4 deletions b/‎scrapling/core/shell.py‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎scrapling/engines/_browsers/_base.py‎
Lines changed: 2 additions & 28 deletions b/‎scrapling/engines/_browsers/_base.py‎
Lines changed: 2 additions & 28 deletions
diff --git a/‎scrapling/engines/_browsers/_camoufox.py‎
Lines changed: 39 additions & 38 deletions b/‎scrapling/engines/_browsers/_camoufox.py‎
Lines changed: 39 additions & 38 deletions
@@ -58,7 +58,7 @@ jobs:
     - name: Install all browsers dependencies
       run: |
         python3 -m pip install --upgrade pip
-        python3 -m pip install playwright==1.52.0 rebrowser-playwright==1.52.0 camoufox
+        python3 -m pip install playwright>=1.55.0 patchright>=1.55.0 camoufox
 
     - name: Retrieve Playwright browsers from cache if any
       id: playwright-cache
 
@@ -322,10 +322,9 @@ This project includes code adapted from:
 ## Thanks and References
 
 - [Daijro](https://github.com/daijro)'s brilliant work on [BrowserForge](https://github.com/daijro/browserforge) and [Camoufox](https://github.com/daijro/camoufox)
-- [Vinyzu](https://github.com/Vinyzu)'s work on [Botright](https://github.com/Vinyzu/Botright)
+- [Vinyzu](https://github.com/Vinyzu)'s brilliant work on [Botright](https://github.com/Vinyzu/Botright) and [PatchRight](https://github.com/Kaliiiiiiiiii-Vinyzu/patchright)
 - [brotector](https://github.com/kaliiiiiiiiii/brotector) for browser detection bypass techniques
-- [fakebrowser](https://github.com/kkoooqq/fakebrowser) for fingerprinting research
-- [rebrowser-patches](https://github.com/rebrowser/rebrowser-patches) for stealth improvements
+- [fakebrowser](https://github.com/kkoooqq/fakebrowser) and [BotBrowser](https://github.com/botswin/BotBrowser) for fingerprinting research
 
 ---
 <div align="center"><small>Designed & crafted with ❤️ by Karim Shoair.</small></div><br>
@@ -35,7 +35,7 @@ It's the same as the vanilla Playwright option, but it provides a simple stealth
 
 Some of the things this fetcher's stealth mode does include:
 
-  * Patching the CDP runtime fingerprint.
+  * Patching the CDP runtime fingerprint through using PatchRight.
   * Mimics some of the real browsers' properties by injecting several JS files and using custom options.
   * Custom flags are used on launch to hide Playwright even more and make it faster.
   * Generates real browser headers of the same type and user OS, then appends them to the request's headers.
 
@@ -64,16 +64,16 @@ dependencies = [
 
 [project.optional-dependencies]
 fetchers = [
-    "click>=8.2.1",
+    "click>=8.3.0",
     "curl_cffi>=0.13.0",
-    "playwright>=1.52.0",
-    "rebrowser-playwright>=1.52.0",
+    "playwright>=1.55.0",
+    "patchright>=1.55.2",
     "camoufox>=0.4.11",
     "geoip2>=5.1.0",
     "msgspec>=0.19.0",
 ]
 ai = [
-    "mcp>=1.14.0",
+    "mcp>=1.14.1",
     "markdownify>=1.2.0",
     "scrapling[fetchers]",
 ]
 
@@ -1,5 +1,5 @@
 __author__ = "Karim Shoair ([email protected])"
-__version__ = "0.3.4"
+__version__ = "0.3.5"
 __copyright__ = "Copyright (c) 2024 Karim Shoair"
 
 
 
@@ -32,8 +32,8 @@ def __ParseJSONData(json_string: Optional[str] = None) -> Optional[Dict[str, Any
 
     try:
         return json_loads(json_string)
-    except JSONDecodeError as e:  # pragma: no cover
-        raise ValueError(f"Invalid JSON data '{json_string}': {e}")
+    except JSONDecodeError as err:  # pragma: no cover
+        raise ValueError(f"Invalid JSON data '{json_string}': {err}")
 
 
 def __Request_and_Save(
@@ -65,8 +65,8 @@ def __ParseExtractArguments(
         for key, value in _CookieParser(cookies):
             try:
                 parsed_cookies[key] = value
-            except Exception as e:
-                raise ValueError(f"Could not parse cookies '{cookies}': {e}")
+            except Exception as err:
+                raise ValueError(f"Could not parse cookies '{cookies}': {err}")
 
     parsed_json = __ParseJSONData(json)
     parsed_params = {}
 
@@ -145,7 +145,7 @@ def re(
         clean_match: bool = False,
         case_sensitive: bool = True,
         check_match: Literal[False] = False,
-    ) -> "TextHandlers[TextHandler]": ...
+    ) -> "TextHandlers": ...
 
     def re(
         self,
@@ -241,7 +241,7 @@ def re(
         replace_entities: bool = True,
         clean_match: bool = False,
         case_sensitive: bool = True,
-    ) -> "TextHandlers[TextHandler]":
+    ) -> "TextHandlers":
         """Call the ``.re()`` method for each element in this list and return
         their results flattened as TextHandlers.
 
 
@@ -201,7 +201,7 @@ def parse(self, curl_command: str) -> Optional[Request]:
                 data_payload = parsed_args.data_binary  # Fallback to string
 
         elif parsed_args.data_raw is not None:
-            data_payload = parsed_args.data_raw
+            data_payload = parsed_args.data_raw.lstrip("$")
 
         elif parsed_args.data is not None:
             data_payload = parsed_args.data
@@ -317,8 +317,8 @@ def show_page_in_browser(page: Selector):  # pragma: no cover
 
     try:
         fd, fname = make_temp_file(prefix="scrapling_view_", suffix=".html")
-        with open(fd, "wb") as f:
-            f.write(page.body)
+        with open(fd, "w", encoding=page.encoding) as f:
+            f.write(page.html_content)
 
         open_in_browser(f"file://{fname}")
     except IOError as e:
@@ -545,7 +545,7 @@ def _extract_content(
             for page in pages:
                 match extraction_type:
                     case "markdown":
-                        yield cls._convert_to_markdown(page.body)
+                        yield cls._convert_to_markdown(page.html_content)
                     case "html":
                         yield page.body
                     case "text":
 
@@ -1,4 +1,4 @@
-from time import time, sleep
+from time import time
 from asyncio import sleep as asyncio_sleep, Lock
 
 from camoufox import DefaultAddons
@@ -44,23 +44,7 @@ def _get_page(
     ) -> PageInfo:  # pragma: no cover
         """Get a new page to use"""
 
-        # Close all finished pages to ensure clean state
-        self.page_pool.close_all_finished_pages()
-
-        # If we're at max capacity after cleanup, wait for busy pages to finish
-        if self.page_pool.pages_count >= self.max_pages:
-            start_time = time()
-            while time() - start_time < self._max_wait_for_page:
-                # Wait for any pages to finish, then clean them up
-                sleep(0.05)
-                self.page_pool.close_all_finished_pages()
-                if self.page_pool.pages_count < self.max_pages:
-                    break
-            else:
-                raise TimeoutError(
-                    f"No pages finished to clear place in the pool within the {self._max_wait_for_page}s timeout period"
-                )
-
+        # No need to check if a page is available or not in sync code because the code blocked before reaching here till the page closed, ofc.
         page = self.context.new_page()
         page.set_default_navigation_timeout(timeout)
         page.set_default_timeout(timeout)
@@ -76,11 +60,6 @@ def _get_page(
 
         return self.page_pool.add_page(page)
 
-    @staticmethod
-    def _get_with_precedence(request_value: Any, session_value: Any, sentinel_value: object) -> Any:
-        """Get value with request-level priority over session-level"""
-        return request_value if request_value is not sentinel_value else session_value
-
     def get_pool_stats(self) -> Dict[str, int]:
         """Get statistics about the current page pool"""
         return {
@@ -105,16 +84,11 @@ async def _get_page(
     ) -> PageInfo:  # pragma: no cover
         """Get a new page to use"""
         async with self._lock:
-            # Close all finished pages to ensure clean state
-            await self.page_pool.aclose_all_finished_pages()
-
             # If we're at max capacity after cleanup, wait for busy pages to finish
             if self.page_pool.pages_count >= self.max_pages:
                 start_time = time()
                 while time() - start_time < self._max_wait_for_page:
-                    # Wait for any pages to finish, then clean them up
                     await asyncio_sleep(0.05)
-                    await self.page_pool.aclose_all_finished_pages()
                     if self.page_pool.pages_count < self.max_pages:
                         break
                 else:
 
@@ -16,7 +16,7 @@
 )
 from playwright._impl._errors import Error as PlaywrightError
 
-from ._validators import validate, CamoufoxConfig
+from ._validators import validate_fetch as _validate
 from ._base import SyncSession, AsyncSession, StealthySessionMixin
 from scrapling.core.utils import log
 from scrapling.core._types import (
@@ -297,23 +297,22 @@ def fetch(
         :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         :return: A `Response` object.
         """
-        # Validate all resolved parameters
-        params = validate(
-            dict(
-                google_search=self._get_with_precedence(google_search, self.google_search, _UNSET),
-                timeout=self._get_with_precedence(timeout, self.timeout, _UNSET),
-                wait=self._get_with_precedence(wait, self.wait, _UNSET),
-                page_action=self._get_with_precedence(page_action, self.page_action, _UNSET),
-                extra_headers=self._get_with_precedence(extra_headers, self.extra_headers, _UNSET),
-                disable_resources=self._get_with_precedence(disable_resources, self.disable_resources, _UNSET),
-                wait_selector=self._get_with_precedence(wait_selector, self.wait_selector, _UNSET),
-                wait_selector_state=self._get_with_precedence(wait_selector_state, self.wait_selector_state, _UNSET),
-                network_idle=self._get_with_precedence(network_idle, self.network_idle, _UNSET),
-                load_dom=self._get_with_precedence(load_dom, self.load_dom, _UNSET),
-                solve_cloudflare=self._get_with_precedence(solve_cloudflare, self.solve_cloudflare, _UNSET),
-                selector_config=self._get_with_precedence(selector_config, self.selector_config, _UNSET),
-            ),
-            CamoufoxConfig,
+        params = _validate(
+            [
+                ("google_search", google_search, self.google_search),
+                ("timeout", timeout, self.timeout),
+                ("wait", wait, self.wait),
+                ("page_action", page_action, self.page_action),
+                ("extra_headers", extra_headers, self.extra_headers),
+                ("disable_resources", disable_resources, self.disable_resources),
+                ("wait_selector", wait_selector, self.wait_selector),
+                ("wait_selector_state", wait_selector_state, self.wait_selector_state),
+                ("network_idle", network_idle, self.network_idle),
+                ("load_dom", load_dom, self.load_dom),
+                ("solve_cloudflare", solve_cloudflare, self.solve_cloudflare),
+                ("selector_config", selector_config, self.selector_config),
+            ],
+            _UNSET,
         )
 
         if self._closed:  # pragma: no cover
@@ -381,8 +380,9 @@ def handle_response(finished_response: SyncPlaywrightResponse):
                 page_info.page, first_response, final_response, params.selector_config
             )
 
-            # Mark the page as finished for next use
-            page_info.mark_finished()
+            # Close the page, to free up resources
+            page_info.page.close()
+            self.page_pool.pages.remove(page_info)
 
             return response
 
@@ -616,22 +616,22 @@ async def fetch(
         :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
         :return: A `Response` object.
         """
-        params = validate(
-            dict(
-                google_search=self._get_with_precedence(google_search, self.google_search, _UNSET),
-                timeout=self._get_with_precedence(timeout, self.timeout, _UNSET),
-                wait=self._get_with_precedence(wait, self.wait, _UNSET),
-                page_action=self._get_with_precedence(page_action, self.page_action, _UNSET),
-                extra_headers=self._get_with_precedence(extra_headers, self.extra_headers, _UNSET),
-                disable_resources=self._get_with_precedence(disable_resources, self.disable_resources, _UNSET),
-                wait_selector=self._get_with_precedence(wait_selector, self.wait_selector, _UNSET),
-                wait_selector_state=self._get_with_precedence(wait_selector_state, self.wait_selector_state, _UNSET),
-                network_idle=self._get_with_precedence(network_idle, self.network_idle, _UNSET),
-                load_dom=self._get_with_precedence(load_dom, self.load_dom, _UNSET),
-                solve_cloudflare=self._get_with_precedence(solve_cloudflare, self.solve_cloudflare, _UNSET),
-                selector_config=self._get_with_precedence(selector_config, self.selector_config, _UNSET),
-            ),
-            CamoufoxConfig,
+        params = _validate(
+            [
+                ("google_search", google_search, self.google_search),
+                ("timeout", timeout, self.timeout),
+                ("wait", wait, self.wait),
+                ("page_action", page_action, self.page_action),
+                ("extra_headers", extra_headers, self.extra_headers),
+                ("disable_resources", disable_resources, self.disable_resources),
+                ("wait_selector", wait_selector, self.wait_selector),
+                ("wait_selector_state", wait_selector_state, self.wait_selector_state),
+                ("network_idle", network_idle, self.network_idle),
+                ("load_dom", load_dom, self.load_dom),
+                ("solve_cloudflare", solve_cloudflare, self.solve_cloudflare),
+                ("selector_config", selector_config, self.selector_config),
+            ],
+            _UNSET,
         )
 
         if self._closed:  # pragma: no cover
@@ -701,8 +701,9 @@ async def handle_response(finished_response: AsyncPlaywrightResponse):
                 page_info.page, first_response, final_response, params.selector_config
             )
 
-            # Mark the page as finished for next use
-            page_info.mark_finished()
+            # Close the page, to free up resources
+            await page_info.page.close()
+            self.page_pool.pages.remove(page_info)
 
             return response