lexiforest · Sh3llcod3 · Oct 4, 2025 · Oct 4, 2025 · Oct 4, 2025 · Oct 4, 2025
diff --git a/README.md b/README.md
@@ -252,8 +252,10 @@ from curl_cffi import AsyncSession
 async with AsyncSession() as s:
     ws = await s.ws_connect("wss://echo.websocket.org")
     await asyncio.gather(*[ws.send_str("Hello, World!") for _ in range(10)])
+    await ws.flush()
     async for message in ws:
         print(message)
+    await ws.close()
 ```
 
 ## Ecosystem

diff --git a/benchmark/README.md b/benchmark/README.md
@@ -22,5 +22,128 @@ Async clients
 Target
 ------
 
-
 All the clients run with session/client enabled.
+
+Async WebSocket
+------
+
+Two distinct benchmarks are provided to evaluate the performance of the `AsyncWebSocket` implementation under different conditions.
+
+1. Simple Throughput Test ([`client`](ws_bench_1_client.py), [`server`](ws_bench_1_server.py))
+
+    This is a lightweight, in-memory benchmark designed to measure the raw throughput and overhead of the WebSocket client. The server sends a repeating chunk of random bytes from memory, and the client receives it. This test is useful for quick sanity checks and detecting performance regressions under ideal, CPU-cached conditions.
+
+2. Verified Streaming Test ([`benchmark`](ws_bench_2.py))
+
+    This is a rigorous, end-to-end test. It first generates a multi-gigabyte file of random data and its SHA256 hash. The benchmark then streams this file from disk over the WebSocket connection. The receiving end calculates the hash of the incoming stream and verifies it against the original, ensuring complete data integrity.
+
+    **Important**: This test requires enough RAM free on the system equal to the size of the random data. It measures the performance of the entire system pipeline, including Disk I/O speed, CPU hashing speed, and network transfer. On modern systems, it is likely to be bottlenecked by the CPU's hashing performance or the disk's read speed.
+
+### Prerequisites
+
+- Python 3.10+
+- Pip packages
+
+```bash
+pip install aiohttp curl_cffi
+```
+
+> `uvloop` is highly recommended for performance on Linux and macOS. The benchmarks will automatically fall back to the standard asyncio event loop if it is not installed or on Windows.
+
+### Setup
+
+1. TLS certificate (optional)
+
+    These benchmarks are configured to use WSS (secure WebSockets) by default on Linux and macOS. To generate a self-signed certificate:
+
+    ```bash
+    openssl req -x509 -newkey rsa:2048 -nodes -keyout localhost.key -out localhost.crt -days 365 -subj "/CN=localhost"
+    ```
+
+    > **Note**: If you are on any platform and skip certificate generation, the benchmarks will use the insecure `ws://` instead.
+
+2. Configuration
+
+    The benchmark parameters (total data size, chunk size) can be modified by editing the `TestConfig` class within the [`ws_bench_utils.py`](ws_bench_utils.py) file. By default, both benchmarks are configured for `10 GiB` of data transfer.
+
+### Running the Benchmarks
+
+It is recommended to run the server and client in separate terminal windows.
+
+#### Benchmark 1: Simple Throughput Test
+
+1. Start the Server:
+
+    ```bash
+    python ws_bench_1_server.py
+    ```
+
+2. Run the Client:
+
+    ```bash
+    python ws_bench_1_client.py
+    ```
+
+#### Benchmark 2: Verified Streaming Test
+
+1. Generate Test File (Initial Setup):
+
+    This command will create a large (`10 GiB`) file named `testdata.bin` and its hash. Ensure you have sufficient disk space:
+
+    ```bash
+    python ws_bench_2.py generate
+    ```
+
+2. Start the Server:
+
+    ```bash
+    python ws_bench_2.py server
+    ```
+
+3. Run the Client (Choose one):
+
+    - To test download speed (server sends, client receives):
+
+    ```bash
+    python ws_bench_2.py client --test download
+    ```
+
+    - To test upload speed (client sends, server receives):
+
+    ```bash
+    python ws_bench_2.py client --test upload
+    ```
+
+### Performance Considerations
+
+Benchmark results can vary significantly based on system-level factors. The following should be kept in mind:
+
+- **Loopback Interface**: These tests run on a local loopback interface (`127.0.0.1`), which does not represent real-world internet conditions (latency, packet loss, etc.).
+
+- **CPU Affinity**: For maximum consistency, especially on multi-core or multi-CPU (NUMA) systems, you can pin the server and client processes to specific CPU cores. This avoids performance penalties from processes migrating between cores or crossing CPU socket boundaries.
+
+**On Linux:**
+Use `taskset` to specify a CPU core (e.g., core 0 for the server, core 1 for the client).
+
+```bash
+# Terminal 1
+taskset -c 0 python ws_bench_1_server.py
+
+# Terminal 2
+taskset -c 1 python ws_bench_1_client.py
+```
+
+**On Windows:**
+Use the `start /affinity` command. The affinity mask is a hexadecimal number (`1` for CPU 0, `2` for CPU 1, `4` for CPU 2, etc.).
+
+```powershell
+# PowerShell/CMD 1
+start /affinity 1 python ws_bench_1_server.py
+
+# PowerShell/CMD 2
+start /affinity 2 python ws_bench_1_client.py
+```
+
+- **Concurrent Tests**: The first benchmark code (`ws_bench_1_client.py`) can be uncommented to run upload and download tests concurrently. Note that a concurrent test will terminate as soon as the faster of the two directions (typically download) completes.
+
+- **Queue Sizes**: Adjust the `send_queue` and `recv_queue` sizes within the [`TestConfig`](ws_bench_utils.py) class to observe the impact on performance and backpressure.
diff --git a/benchmark/ws_bench_1_client.py b/benchmark/ws_bench_1_client.py
@@ -0,0 +1,178 @@
+#!/usr/bin/env python3
+"""
+Websocket client simple benchmark - TLS (WSS)
+"""
+
+import time
+from asyncio import (
+    FIRST_COMPLETED,
+    AbstractEventLoop,
+    CancelledError,
+    Task,
+    sleep,
+    wait,
+)
+
+from typing_extensions import Never
+from ws_bench_utils import binary_data_generator, config, get_loop, logger
+
+from curl_cffi import AsyncSession, AsyncWebSocket, WebSocketClosed
+
+
+def calculate_stats(start_time: float, total_len: int) -> tuple[float, float]:
+    """Calculate the amount of time it took and the throughput average.
+
+    Args:
+        start_time (`float`): The start time from the performance counter
+
+    Returns:
+        `tuple[float, float]`: The duration and rate in Gbps
+    """
+    end_time: float = time.perf_counter()
+    duration: float = end_time - start_time
+    rate_gbps: float = (total_len * 8) / duration / (1024**3)
+    return duration, rate_gbps
+
+
+async def health_check() -> Never:
+    """A simple coroutine that continuously prints a dot to prove that the event loop
+    is alive and not starved from being able to run this task.
+
+    Returns:
+        Never: Keeps printing dots until the task is cancelled.
+    """
+    counter = 0
+    logger.info("Starting sanity check. You should see a continuous stream of dots '.'")
+    logger.info("If the dots stop for a long time, the event loop is blocked.")
+    try:
+        while True:
+            await sleep(0.05)
+            print(".", end="", flush=True)
+            counter += 1
+            if counter % 100 == 0:
+                print("")
+    finally:
+        print("\r\x1b[K", end="")
+        logger.info("Sanity check complete.")
+
+
+async def ws_counter(ws: AsyncWebSocket) -> None:
+    """Simple coroutine which counts how many bytes were received.
+
+    Args:
+        ws (`AsyncWebSocket`): Instantiated Curl CFFI AsyncWebSocket object.
+    """
+    recvd_len: int = 0
+    start_time: float = time.perf_counter()
+    logger.info("Receiving data from server")
+    try:
+        async for msg in ws:
+            recvd_len += len(msg)
+
+    except WebSocketClosed as exc:
+        logger.debug(exc)
+
+    finally:
+        duration, avg_rate = calculate_stats(start_time, recvd_len)
+        print("\r\x1b[K", end="")
+        logger.info(
+            "Received: %.2f GB in %.2f seconds", recvd_len / (1024**3), duration
+        )
+        logger.info("Average throughput (recv): %.2f Gbps", avg_rate)
+
+
+async def ws_sender(ws: AsyncWebSocket) -> None:
+    """Simple coroutine which just sends the same chunk of bytes until exhausted.
+
+    Args:
+        ws (`AsyncWebSocket`): Instantiated Curl CFFI AsyncWebSocket object.
+    """
+    sent_len: int = 0
+    start_time: float = time.perf_counter()
+    logger.info("Sending data to server")
+    try:
+        async for data_chunk in binary_data_generator(
+            total_gb=config.total_gb, chunk_size=min(65535, config.chunk_size)
+        ):
+            _ = await ws.send(payload=data_chunk)
+            sent_len += len(data_chunk)
+
+    except WebSocketClosed as exc:
+        logger.debug(exc)
+
+    finally:
+        duration, avg_rate = calculate_stats(start_time, sent_len)
+        print("\r\x1b[K", end="")
+        logger.info("Sent: %.2f GB in %.2f seconds", sent_len / (1024**3), duration)
+        logger.info("Average throughput (send): %.2f Gbps", avg_rate)
+
+
+async def run_benchmark(loop: AbstractEventLoop) -> None:
+    """
+    Simple client benchmark which sends/receives binary messages using curl-cffi.
+    """
+    logger.info("Starting curl-cffi benchmark")
+    ws: AsyncWebSocket | None = None
+    waiters: set[Task[None]] = set()
+    try:
+        async with AsyncSession(impersonate="chrome", verify=False) as session:
+            ws = await session.ws_connect(
+                config.srv_path,
+                recv_queue_size=config.recv_queue,
+                send_queue_size=config.send_queue,
+            )
+            logger.info("Connection established to %s", config.srv_path)
+
+            # NOTE: Uncomment for send/recv benchmark or both
+            waiters.add(loop.create_task(ws_counter(ws)))
+            # waiters.add(loop.create_task(ws_sender(ws)))
+
+            _, _ = await wait(waiters, return_when=FIRST_COMPLETED)
+
+    except Exception:
+        logger.exception("curl-cffi benchmark failed")
+        raise
+
+    finally:
+        for wait_task in waiters:
+            try:
+                if not wait_task.done():
+                    _ = wait_task.cancel()
+                    await wait_task
+
+            except CancelledError:
+                ...
+        if ws:
+            await ws.close(timeout=2)
+
+
+async def main(loop: AbstractEventLoop) -> None:
+    """Entrypoint"""
+    waiters: set[Task[None]] = set()
+
+    try:
+        # Create the health check and benchmark tasks
+        waiters.update(
+            {loop.create_task(health_check()), loop.create_task(run_benchmark(loop))}
+        )
+        _, _ = await wait(waiters, return_when=FIRST_COMPLETED)
+
+    except (KeyboardInterrupt, CancelledError):
+        logger.debug("Cancelling benchmark")
+
+    finally:
+        for wait_task in waiters:
+            try:
+                if not wait_task.done():
+                    _ = wait_task.cancel()
+                    await wait_task
+            except CancelledError:
+                ...
+
+
+if __name__ == "__main__":
+    evt_loop: AbstractEventLoop = get_loop()
+    try:
+        evt_loop.run_until_complete(main(evt_loop))
+    finally:
+        evt_loop.close()