Skip to content

Commit 65cc41c

Browse files
joshuayaoletonghanpre-commit-ci[bot]
authored
Add Browser-use Agent Example (#2312)
Signed-off-by: letonghan <[email protected]> Signed-off-by: Joshua Yao <[email protected]> Co-authored-by: letonghan <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent 197678c commit 65cc41c

File tree

15 files changed

+605
-1
lines changed

15 files changed

+605
-1
lines changed

.github/CODEOWNERS

Lines changed: 1 addition & 0 deletions

BrowserUseAgent/Dockerfile

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# Copyright (C) 2025 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
ARG IMAGE_REPO=opea
5+
ARG BASE_TAG=latest
6+
FROM $IMAGE_REPO/comps-base:$BASE_TAG
7+
8+
USER root
9+
10+
COPY ./requirements.txt $HOME/requirements.txt
11+
COPY ./browser_use_agent.py $HOME/browser_use_agent.py
12+
13+
ARG uvpip='uv pip install --system --no-cache-dir'
14+
RUN uv pip install --system --upgrade pip setuptools uv && \
15+
$uvpip pytest-playwright && \
16+
playwright install chromium --with-deps --no-shell && \
17+
$uvpip -r requirements.txt && \
18+
$uvpip posthog==5.4.0
19+
20+
USER user
21+
ENTRYPOINT ["python", "browser_use_agent.py"]

BrowserUseAgent/README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# Browser-use Agent Application
2+
3+
Browser-use agent empowers anyone to automate repetitive web tasks. It controls your web browser to perform tasks like visiting websites and extracting data. The application is powered by [browser-use](https://github.com/browser-use/browser-use) and OPEA LLM serving microservice.
4+
5+
## Deployment Options
6+
7+
The table below lists currently available deployment options. They outline in detail the implementation of this example on selected hardware.
8+
9+
| Category | Deployment Option | Description |
10+
| ---------------------- | ---------------------- | ----------------------------------------------------------------- |
11+
| On-premise Deployments | Docker Compose (Gaudi) | [Deployment on Gaudi](./docker_compose/intel/hpu/gaudi/README.md) |
12+
13+
## Validated Configurations
14+
15+
| **Deploy Method** | **LLM Engine** | **LLM Model** | **Hardware** |
16+
| ----------------- | -------------- | ---------------------------- | ------------ |
17+
| Docker Compose | vLLM | Qwen/Qwen2.5-VL-32B-Instruct | Intel Gaudi |
18+
| Docker Compose | vLLM | Qwen/Qwen2.5-VL-72B-Instruct | Intel Gaudi |
Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
5+
import os
6+
7+
from browser_use import Agent, BrowserProfile
8+
from comps import opea_microservices, register_microservice
9+
from comps.cores.telemetry.opea_telemetry import opea_telemetry
10+
from fastapi import Request
11+
from langchain_openai import ChatOpenAI
12+
from pydantic import BaseModel, SecretStr
13+
14+
LLM = None
15+
BROWSER_PROFILE = None
16+
LLM_ENDPOINT = os.getenv("LLM_ENDPOINT", "http://0.0.0.0:8008")
17+
LLM_MODEL = os.getenv("LLM_MODEL", "Qwen/Qwen2.5-VL-32B-Instruct")
18+
19+
20+
def initiate_llm_and_browser(llm_endpoint: str, model: str, secret_key: str = "sk-xxxxxx"):
21+
# Initialize global LLM and BrowserProfile if not already initialized
22+
global LLM, BROWSER_PROFILE
23+
if not LLM:
24+
LLM = ChatOpenAI(base_url=f"{llm_endpoint}/v1", model=model, api_key=SecretStr(secret_key), temperature=0.1)
25+
if not BROWSER_PROFILE:
26+
BROWSER_PROFILE = BrowserProfile(
27+
headless=True,
28+
chromium_sandbox=False,
29+
)
30+
return LLM, BROWSER_PROFILE
31+
32+
33+
class BrowserUseRequest(BaseModel):
34+
task_prompt: str
35+
use_vision: bool = True
36+
secret_key: str = "sk-xxxxxx"
37+
llm_endpoint: str = LLM_ENDPOINT
38+
llm_model: str = LLM_MODEL
39+
agent_max_steps: int = 10
40+
41+
42+
class BrowserUseResponse(BaseModel):
43+
is_success: bool = False
44+
model: str
45+
task_prompt: str
46+
use_vision: bool
47+
agent_researched_urls: list[str] = []
48+
agent_actions: list[str] = []
49+
agent_durations: float
50+
agent_steps: int
51+
final_result: str
52+
53+
54+
@register_microservice(
55+
name="opea_service@browser_use_agent",
56+
endpoint="/v1/browser_use_agent",
57+
host="0.0.0.0",
58+
port=8022,
59+
)
60+
@opea_telemetry
61+
async def run(request: Request):
62+
data = await request.json()
63+
chat_request = BrowserUseRequest.model_validate(data)
64+
llm, browser_profile = initiate_llm_and_browser(
65+
llm_endpoint=chat_request.llm_endpoint, model=chat_request.llm_model, secret_key=chat_request.secret_key
66+
)
67+
agent = Agent(
68+
task=chat_request.task_prompt,
69+
llm=llm,
70+
use_vision=chat_request.use_vision,
71+
enable_memory=False,
72+
browser_profile=browser_profile,
73+
)
74+
history = await agent.run(max_steps=chat_request.agent_max_steps)
75+
76+
return BrowserUseResponse(
77+
is_success=history.is_successful() if history.is_successful() is not None else False,
78+
model=chat_request.llm_model,
79+
task_prompt=chat_request.task_prompt,
80+
use_vision=chat_request.use_vision,
81+
agent_researched_urls=history.urls(),
82+
agent_actions=history.action_names(),
83+
agent_durations=round(history.total_duration_seconds(), 3),
84+
agent_steps=history.number_of_steps(),
85+
final_result=history.final_result() if history.is_successful() else f"Task failed: {history.errors()}",
86+
)
87+
88+
89+
if __name__ == "__main__":
90+
opea_microservices["opea_service@browser_use_agent"].start()
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# Example BrowserUseAgent deployments on an Intel® Gaudi® Platform
2+
3+
This example covers the single-node on-premises deployment of the BrowserUseAgent example using OPEA components. This example begins with a Quick Start section and then documents how to modify deployments, leverage new models and configure the number of allocated devices.
4+
5+
**Note** This example requires access to a properly installed Intel® Gaudi® platform with a functional Docker service configured to use the habanalabs-container-runtime. Please consult the [Intel® Gaudi® software Installation Guide](https://docs.habana.ai/en/v1.20.1/Installation_Guide/Driver_Installation.html) for more information.
6+
7+
## Quick Start Deployment
8+
9+
This section describes how to quickly deploy and test the BrowserUseAgent service manually on an Intel® Gaudi® platform. The basic steps are:
10+
11+
1. [Access the Code](#access-the-code)
12+
2. [Generate a HuggingFace Access Token](#generate-a-huggingface-access-token)
13+
3. [Configure the Deployment Environment](#configure-the-deployment-environment)
14+
4. [Deploy the Services Using Docker Compose](#deploy-the-services-using-docker-compose)
15+
5. [Check the Deployment Status](#check-the-deployment-status)
16+
6. [Test the Pipeline](#test-the-pipeline)
17+
7. [Cleanup the Deployment](#cleanup-the-deployment)
18+
19+
### Access the Code
20+
21+
Clone the GenAIExample repository and access the BrowserUseAgent Intel® Gaudi® platform Docker Compose files and supporting scripts:
22+
23+
```bash
24+
git clone https://github.com/opea-project/GenAIExamples.git
25+
cd GenAIExamples/BrowserUseAgent/docker_compose/intel/hpu/gaudi/
26+
```
27+
28+
Checkout a released version, such as v1.5:
29+
30+
```bash
31+
git checkout v1.5
32+
```
33+
34+
### Generate a HuggingFace Access Token
35+
36+
Some HuggingFace resources, such as some models, are only accessible if you have an access token. If you do not already have a HuggingFace access token, you can create one by first creating an account by following the steps provided at [HuggingFace](https://huggingface.co/) and then generating a [user access token](https://huggingface.co/docs/transformers.js/en/guides/private#step-1-generating-a-user-access-token).
37+
38+
### Configure the Deployment Environment
39+
40+
To set up environment variables for deploying BrowserUseAgent services, source the _setup_env.sh_ script in this directory:
41+
42+
```bash
43+
source ./set_env.sh
44+
```
45+
46+
The _set_env.sh_ script will prompt for required and optional environment variables used to configure the BrowserUseAgent services. If a value is not entered, the script will use a default value for the same. Users need to check if the values fit your deployment environment.
47+
48+
### Deploy the Services Using Docker Compose
49+
50+
To deploy the BrowserUseAgent services, execute the `docker compose up` command with the appropriate arguments. For a default deployment, execute:
51+
52+
```bash
53+
docker compose up -d
54+
```
55+
56+
The BrowserUseAgent docker images should automatically be downloaded from the `OPEA registry` and deployed on the Intel® Gaudi® Platform.
57+
58+
### Check the Deployment Status
59+
60+
After running docker compose, check if all the containers launched via docker compose have started:
61+
62+
```bash
63+
docker ps -a
64+
```
65+
66+
For the default deployment, the following 10 containers should have started:
67+
68+
```
69+
CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES
70+
96cb590c749c opea/browser-use-agent:latest "python browser_use_…" 9 seconds ago Up 8 seconds 0.0.0.0:8022->8022/tcp, :::8022->8022/tcp browser-use-agent-server
71+
8072e1c33a4b opea/vllm-gaudi:latest "python3 -m vllm.ent…" 9 seconds ago Up 8 seconds (health: starting) 0.0.0.0:8008->80/tcp, [::]:8008->80/tcp vllm-gaudi-server
72+
```
73+
74+
### Test the Pipeline
75+
76+
If you don't have existing websites to test, follow the [guide](./../../../../tests/webarena/README.md) to deploy one in your local environment.
77+
78+
Once the BrowserUseAgent services are running, test the pipeline using the following command:
79+
80+
```bash
81+
curl -X POST http://${host_ip}:${BROWSER_USE_AGENT_PORT}/v1/browser_use_agent \
82+
-H "Content-Type: application/json" \
83+
-d '{"task_prompt": "Navigate to http://10.7.4.57:8083/admin and login with the credentials: username: admin, password: admin1234. Then, find out What are the top-2 best-selling product in 2022?"}'
84+
```
85+
86+
- Note that Update the `task_prompt` to match the evaluation question relevant to your configured website.
87+
88+
### Cleanup the Deployment
89+
90+
To stop the containers associated with the deployment, execute the following command:
91+
92+
```bash
93+
docker compose -f compose.yaml down
94+
```
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
x-common-environment:
5+
&common-env
6+
no_proxy: ${no_proxy}
7+
http_proxy: ${http_proxy}
8+
https_proxy: ${https_proxy}
9+
10+
services:
11+
vllm-gaudi-server:
12+
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-1.22.0}
13+
container_name: vllm-gaudi-server
14+
ports:
15+
- ${LLM_ENDPOINT_PORT:-8008}:80
16+
volumes:
17+
- "${DATA_PATH:-./data}:/data"
18+
environment:
19+
<<: *common-env
20+
HF_TOKEN: ${HF_TOKEN}
21+
HF_HOME: /data
22+
HABANA_VISIBLE_DEVICES: all
23+
OMPI_MCA_btl_vader_single_copy_mechanism: none
24+
LLM_MODEL_ID: ${LLM_MODEL_ID}
25+
VLLM_TORCH_PROFILER_DIR: "/mnt"
26+
VLLM_SKIP_WARMUP: true
27+
PT_HPU_ENABLE_LAZY_COLLECTIVES: true
28+
runtime: habana
29+
cap_add:
30+
- SYS_NICE
31+
ipc: host
32+
healthcheck:
33+
test: ["CMD-SHELL", "curl -f http://localhost:80/health || exit 1"]
34+
interval: 10s
35+
timeout: 10s
36+
retries: 150
37+
command: --model $LLM_MODEL_ID --tensor-parallel-size $NUM_CARDS --host 0.0.0.0 --port 80 --max-seq-len-to-capture $MAX_TOTAL_TOKENS
38+
39+
browser-use-agent-server:
40+
image: ${REGISTRY:-opea}/browser-use-agent:${TAG:-latest}
41+
container_name: browser-use-agent-server
42+
depends_on:
43+
- vllm-gaudi-server
44+
ports:
45+
- ${BROWSER_USE_AGENT_PORT:-8022}:8022
46+
environment:
47+
<<: *common-env
48+
LLM_ENDPOINT: ${LLM_ENDPOINT-http://0.0.0.0:8008}
49+
LLM_MODEL: ${LLM_MODEL_ID-Qwen/Qwen2-VL-72B-Instruct}
50+
ipc: host
Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright (C) 2024 Intel Corporation
4+
# SPDX-License-Identifier: Apache-2.0
5+
6+
# Navigate to the parent directory and source the environment
7+
SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd)
8+
9+
pushd "$SCRIPT_DIR/../../../../../" > /dev/null
10+
source .set_env.sh
11+
popd > /dev/null
12+
13+
# Function to check if a variable is set
14+
check_var() {
15+
if [ "$#" -ne 1 ]; then
16+
echo "Error: Usage: check_var <ENV_VARIABLE_NAME>" >&2
17+
return 2
18+
fi
19+
20+
local var_name="$1"
21+
if [ -n "${!var_name}" ]; then
22+
# Variable value is non-empty
23+
return 0
24+
else
25+
# Variable is unset or set to an empty string
26+
return 1
27+
fi
28+
}
29+
30+
check_var "HF_TOKEN"
31+
export ip_address=$(hostname -I | awk '{print $1}')
32+
33+
export LLM_ENDPOINT_PORT="${LLM_ENDPOINT_PORT:-8008}"
34+
export LLM_ENDPOINT="http://${ip_address}:${LLM_ENDPOINT_PORT}"
35+
export DATA_PATH="${DATA_PATH-"./data"}"
36+
export LLM_MODEL_ID="${LLM_MODEL_ID-"Qwen/Qwen2.5-VL-32B-Instruct"}"
37+
export MAX_TOTAL_TOKENS="${MAX_TOTAL_TOKENS-12288}"
38+
export NUM_CARDS="${NUM_CARDS-4}"
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Copyright (C) 2024 Intel Corporation
2+
# SPDX-License-Identifier: Apache-2.0
3+
4+
services:
5+
browser-use-agent:
6+
build:
7+
args:
8+
IMAGE_REPO: ${REGISTRY}
9+
BASE_TAG: ${TAG}
10+
http_proxy: ${http_proxy}
11+
https_proxy: ${https_proxy}
12+
no_proxy: ${no_proxy}
13+
context: ../
14+
dockerfile: ./Dockerfile
15+
image: ${REGISTRY:-opea}/browser-use-agent:${TAG:-latest}
16+
17+
vllm-gaudi:
18+
build:
19+
context: vllm-fork
20+
dockerfile: ./docker/Dockerfile.hpu
21+
extends: browser-use-agent
22+
image: ${REGISTRY:-opea}/vllm-gaudi:${TAG:-latest}

BrowserUseAgent/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
browser-use==0.3.2

BrowserUseAgent/tests/README.md

Whitespace-only changes.

0 commit comments

Comments
 (0)