OpenPipe
diff --git a/‎.github/workflows/ruff.yml
Lines changed: 29 additions & 0 deletions b/‎.github/workflows/ruff.yml
Lines changed: 29 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md
Lines changed: 22 additions & 2 deletions b/‎CONTRIBUTING.md
Lines changed: 22 additions & 2 deletions
diff --git a/‎dev/new_models/benchmark_inference.py
Lines changed: 33 additions & 10 deletions b/‎dev/new_models/benchmark_inference.py
Lines changed: 33 additions & 10 deletions
diff --git a/‎dev/new_models/gemma3.py
Lines changed: 8 additions & 3 deletions b/‎dev/new_models/gemma3.py
Lines changed: 8 additions & 3 deletions
diff --git a/‎dev/new_models/qwen3_try.ipynb
Lines changed: 8 additions & 13 deletions b/‎dev/new_models/qwen3_try.ipynb
Lines changed: 8 additions & 13 deletions
diff --git a/‎dev/new_models/qwen3_try.py
Lines changed: 9 additions & 3 deletions b/‎dev/new_models/qwen3_try.py
Lines changed: 9 additions & 3 deletions
@@ -0,0 +1,29 @@
+name: Ruff Format Check
+
+on:
+  pull_request:
+    branches: [ main ]
+  push:
+    branches: [ main ]
+
+jobs:
+  ruff:
+    runs-on: ubuntu-latest
+    
+    steps:
+    - name: Checkout code
+      uses: actions/checkout@v4
+    
+    - name: Set up Python
+      uses: actions/setup-python@v5
+      with:
+        python-version: '3.10'
+    
+    - name: Install ruff
+      run: pip install ruff
+    
+    - name: Check formatting with ruff
+      run: ruff format --check .
+    
+    - name: Check linting with ruff
+      run: ruff check .
@@ -3,8 +3,8 @@
 Clone the repository:
 
 ```bash
-git clone https://github.com/OpenPipe/agent-reinforcement-training.git
-cd agent-reinforcement-training
+git clone https://github.com/OpenPipe/ART.git
+cd ART
 ```
 
 Install the dependencies:
@@ -13,6 +13,26 @@ Install the dependencies:
 uv sync
 ```
 
+### Code Formatting and Linting
+
+This project uses [ruff](https://github.com/astral-sh/ruff) for both code formatting and linting. Before submitting a pull request, please ensure your code passes both checks:
+
+```bash
+# Check code formatting
+uv run ruff format --check .
+
+# Run linting checks
+uv run ruff check .
+
+# To automatically fix formatting issues
+uv run ruff format .
+
+# To automatically fix some linting issues
+uv run ruff check --fix .
+```
+
+These checks are automatically run in CI for all pull requests. You can also install ruff as a pre-commit hook if desired.
+
 Then follow the SkyPilot or Local Training instructions below.
 
 > **Warning:** There is currently a bug with tool use functionality. The issue appears to be that vLLM does not return all the token log probabilities for tool use. Further investigation is needed to determine the exact cause. For now, teaching use case-specific tool use with non-tool use models is the recommended workaround.
 
@@ -5,7 +5,7 @@
 and requests approximately 1000 output tokens (max_tokens=1000), repeating
 for 10 iterations. It measures per-request latencies and summarizes statistics.
 """
-import os
+
 import time
 import asyncio
 import statistics
@@ -14,6 +14,8 @@
 from art.local import LocalBackend
 
 load_dotenv()
+
+
 async def timed_request(client, model_name, prompt, max_tokens, temperature):
     """Execute a single model request and measure elapsed time and token usage."""
     start = time.perf_counter()
@@ -31,11 +33,19 @@ async def timed_request(client, model_name, prompt, max_tokens, temperature):
         usage = response.usage
         prompt_tokens = getattr(usage, "prompt_tokens", None)
         completion_tokens = getattr(usage, "completion_tokens", None)
-    return {"response": response, "elapsed": elapsed, "prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens}
+    return {
+        "response": response,
+        "elapsed": elapsed,
+        "prompt_tokens": prompt_tokens,
+        "completion_tokens": completion_tokens,
+    }
+
 
 async def main():
     # Define prompt (approx 1000 input tokens) and model
-    prompt = ("Hello world. " * 500).strip() + "Please repeat the entire prompt back to me verbatim"
+    prompt = (
+        "Hello world. " * 500
+    ).strip() + "Please repeat the entire prompt back to me verbatim"
     # Output tokens to request
     max_tokens = 1000
     temperature = 1.0
@@ -59,7 +69,9 @@ async def main():
     per_request_completion_tokens = []
 
     for i in range(1, iterations + 1):
-        print(f"Iteration {i}/{iterations}: sending {concurrency} concurrent requests...")
+        print(
+            f"Iteration {i}/{iterations}: sending {concurrency} concurrent requests..."
+        )
         iteration_start = time.perf_counter()
         # launch concurrent requests and time each individually
         tasks = [
@@ -92,11 +104,21 @@ async def main():
     pr_min = min(per_request_durations) if per_request_durations else 0.0
     pr_max = max(per_request_durations) if per_request_durations else 0.0
     pr_avg = statistics.mean(per_request_durations) if per_request_durations else 0.0
-    pr_std = statistics.stdev(per_request_durations) if len(per_request_durations) > 1 else 0.0
-    avg_prompt_tokens = (statistics.mean(per_request_prompt_tokens)
-                         if per_request_prompt_tokens else None)
-    avg_completion_tokens = (statistics.mean(per_request_completion_tokens)
-                             if per_request_completion_tokens else None)
+    pr_std = (
+        statistics.stdev(per_request_durations)
+        if len(per_request_durations) > 1
+        else 0.0
+    )
+    avg_prompt_tokens = (
+        statistics.mean(per_request_prompt_tokens)
+        if per_request_prompt_tokens
+        else None
+    )
+    avg_completion_tokens = (
+        statistics.mean(per_request_completion_tokens)
+        if per_request_completion_tokens
+        else None
+    )
 
     # Report results
     print("\nInference benchmark results:")
@@ -118,5 +140,6 @@ async def main():
     if avg_completion_tokens is not None:
         print(f"  Avg completion tokens: {avg_completion_tokens:.2f}")
 
+
 if __name__ == "__main__":
-    asyncio.run(main())
+    asyncio.run(main())
@@ -3,10 +3,10 @@
 import art
 from art.local import LocalBackend
 from dotenv import load_dotenv
-import openai
 
 load_dotenv()
 
+
 async def rollout(model: art.TrainableModel, prompt: str) -> art.Trajectory:
     messages: art.Messages = [
         {
@@ -16,7 +16,10 @@ async def rollout(model: art.TrainableModel, prompt: str) -> art.Trajectory:
     ]
     client = model.openai_client()
     chat_completion = await client.chat.completions.create(
-        messages=messages, model=model.name, max_tokens=100, timeout=100,
+        messages=messages,
+        model=model.name,
+        max_tokens=100,
+        timeout=100,
     )
     choice = chat_completion.choices[0]
     content = choice.message.content
@@ -31,6 +34,7 @@ async def rollout(model: art.TrainableModel, prompt: str) -> art.Trajectory:
         reward = 0.0
     return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward)
 
+
 async def main():
     with open("dev/new_models/prompts.json", "r") as f:
         prompts = json.load(f)
@@ -61,5 +65,6 @@ async def main():
             config=art.TrainConfig(learning_rate=1e-4),
         )
 
+
 if __name__ == "__main__":
-    asyncio.run(main())
+    asyncio.run(main())
@@ -40,12 +40,10 @@
     }
    ],
    "source": [
-    "import asyncio\n",
     "import json\n",
     "import art\n",
     "from art.local import LocalBackend\n",
     "from dotenv import load_dotenv\n",
-    "import openai\n",
     "\n",
     "load_dotenv()"
    ]
@@ -65,7 +63,11 @@
     "    ]\n",
     "    client = model.openai_client()\n",
     "    chat_completion = await client.chat.completions.create(\n",
-    "        messages=messages, model=model.name, max_tokens=100, timeout=100, extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n",
+    "        messages=messages,\n",
+    "        model=model.name,\n",
+    "        max_tokens=100,\n",
+    "        timeout=100,\n",
+    "        extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n",
     "    )\n",
     "    choice = chat_completion.choices[0]\n",
     "    content = choice.message.content\n",
@@ -399,26 +401,19 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "for _ in range(await model.get_step(), 1_000):\n",
+    "for _ in range(await qwen3.get_step(), 1_000):\n",
     "    train_groups = await art.gather_trajectory_groups(\n",
     "        (\n",
-    "            art.TrajectoryGroup(rollout(model, prompt) for _ in range(32))\n",
+    "            art.TrajectoryGroup(rollout(qwen3, prompt) for _ in range(32))\n",
     "            for prompt in prompts\n",
     "        ),\n",
     "        pbar_desc=\"gather\",\n",
     "    )\n",
-    "    await model.train(\n",
+    "    await qwen3.train(\n",
     "        train_groups,\n",
     "        config=art.TrainConfig(learning_rate=1e-4),\n",
     "    )"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
 
@@ -3,10 +3,10 @@
 import art
 from art.local import LocalBackend
 from dotenv import load_dotenv
-import openai
 
 load_dotenv()
 
+
 async def rollout(model: art.TrainableModel, prompt: str) -> art.Trajectory:
     messages: art.Messages = [
         {
@@ -16,7 +16,11 @@ async def rollout(model: art.TrainableModel, prompt: str) -> art.Trajectory:
     ]
     client = model.openai_client()
     chat_completion = await client.chat.completions.create(
-        messages=messages, model=model.name, max_tokens=100, timeout=100, extra_body={"chat_template_kwargs": {"enable_thinking": False}},
+        messages=messages,
+        model=model.name,
+        max_tokens=100,
+        timeout=100,
+        extra_body={"chat_template_kwargs": {"enable_thinking": False}},
     )
     choice = chat_completion.choices[0]
     content = choice.message.content
@@ -31,6 +35,7 @@ async def rollout(model: art.TrainableModel, prompt: str) -> art.Trajectory:
         reward = 0.0
     return art.Trajectory(messages_and_choices=[*messages, choice], reward=reward)
 
+
 async def main():
     with open("dev/new_models/prompts.json", "r") as f:
         prompts = json.load(f)
@@ -57,5 +62,6 @@ async def main():
             config=art.TrainConfig(learning_rate=1e-4),
         )
 
+
 if __name__ == "__main__":
-    asyncio.run(main())
+    asyncio.run(main())