opensearch-project · IanHoang · Aug 20, 2025 · Jun 4, 2025 · Jun 30, 2025 · Jul 10, 2025
@@ -0,0 +1,113 @@
+#!/usr/bin/env python3
+# .ci/scripts/check_deprecated_terms.py
+
+import os, re, sys, argparse
+
+# ---- Term sets ----
+# Block these when you're on the *2.x* branch (i.e., forbid legacy 1.x names):
+TERMS_1X = [
+    "provision-configs",
+    "provision-config-instances",
+    "results-publishing",
+    "results-publisher",
+    "load-worker-coordinator-hosts",
+    "execute-test",
+]
+
+# Block these when you're on the *1.x* branch (i.e., forbid 2.x names):
+TERMS_2X = [
+    "cluster-configs",
+    "reporting",
+    "worker-hosts",
+    "run-test",
+    "test-run",
+]
+
+SKIP_DIRS = {".git", "venv", "__pycache__", ".pytest_cache", ".ci", "tests"}
+VALID_EXTENSIONS = (".py", ".yml", ".yaml", ".md", ".sh", ".json", ".txt")
+
+SUPRESS_MARKERS = {
+    "block-1x": "check-deprecated-terms-disable-1x",
+    "block-2x": "check-deprecated-terms-disable-2x"
+}
+
+def generate_variants(term: str) -> set[str]:
+    base = term.replace("-", " ").replace("_", " ")
+    words = base.split()
+    variants = set()
+    # kebab, snake, Pascal, camel
+    variants.add("-".join(words))
+    variants.add("_".join(words))
+    variants.add("".join([w.capitalize() for w in words]))  # PascalCase
+    variants.add(words[0] + "".join([w.capitalize() for w in words[1:]]))  # camelCase
+
+    # Optional: flip order for 2-word terms, but avoid silly "-ip" flips creating noise
+    if len(words) == 2 and not words[1].lower() == "ip":
+        variants.add("-".join(words[::-1]))
+        variants.add("_".join(words[::-1]))
+        variants.add(words[1] + words[0].capitalize())  # camelCase reverse
+    return variants
+
+def build_patterns(terms: list[str]) -> list[re.Pattern]:
+    pats = []
+    for t in terms:
+        for v in generate_variants(t):
+            pats.append(re.compile(re.escape(v), re.IGNORECASE))
+    return pats
+
+def should_check_file(path: str) -> bool:
+    return path.endswith(VALID_EXTENSIONS)
+
+def walk_and_check(patterns: list[re.Pattern], mode: str) -> int:
+    error_found = 0
+    suppress_marker = SUPRESS_MARKERS.get(mode)
+    for root, _, files in os.walk("."):
+        if any(skip in root.split(os.sep) for skip in SKIP_DIRS):
+            continue
+        for f in files:
+            full_path = os.path.join(root, f)
+            if not should_check_file(full_path):
+                continue
+            try:
+                with open(full_path, "r", encoding="utf-8") as fh:
+                    previous_line = ""
+                    for i, line in enumerate(fh, 1):
+                        if suppress_marker in previous_line or suppress_marker in line:
+                            previous_line = line
+                            continue
+                        for patt in patterns:
+                            if patt.search(line):
+                                print(f"[Forbidden Term] {full_path}:{i}: {line.strip()}")
+                                error_found = 1
+                                break
+            except Exception as e:
+                print(f"[Warning] Skipped file {full_path}: {e}")
+    return error_found
+
+def main():
+    p = argparse.ArgumentParser(description="Check forbidden term set by mode or env.")
+    p.add_argument("--mode", choices=["block-1x", "block-2x"], default=os.getenv("OSB_TERM_MODE"))
+    args = p.parse_args()
+
+    mode = args.mode
+    if not mode:
+        print("No mode provided (use --mode block-1x | block-2x or set OSB_TERM_MODE). Exiting 0.")
+        sys.exit(0)
+
+    if mode == "block-1x":
+        terms = TERMS_1X
+        banner = "❌ 1.x terms found in 2.x branch. Replace with 2.x names."
+    else:
+        terms = TERMS_2X
+        banner = "❌ 2.x terms found in 1.x branch. Replace with 1.x names."
+
+    patterns = build_patterns(terms)
+    failed = walk_and_check(patterns, mode)
+    if failed:
+        print("\n" + banner)
+        sys.exit(1)
+    print("✅ No forbidden terms found for", mode)
+    sys.exit(0)
+
+if __name__ == "__main__":
+    main()
@@ -24,3 +24,27 @@ jobs:
 
       - name: Run the CI build script
         run: bash .ci/build.sh build_and_unit_test
+
+      - name: Detect term-check mode from labels
+        id: detect_mode
+        shell: bash
+        run: |
+          labels='${{ toJson(github.event.pull_request.labels.*.name) }}'
+          echo "PR labels: $labels"
+          mode=""
+
+          if echo "$labels" | grep -qi '"check-1.x-terms"'; then
+            mode="block-1x"
+          elif echo "$labels" | grep -qi '"check-2.x-terms"'; then
+            mode="block-2x"
+          fi
+
+          echo "mode=$mode" >> "$GITHUB_OUTPUT"
+
+      # 2) Run the checker only if a mode was selected
+      - name: Run term lint checker
+        if: steps.detect_mode.outputs.mode != ''
+        env:
+          OSB_TERM_MODE: ${{ steps.detect_mode.outputs.mode }}
+        run: |
+          python3 .ci/scripts/check_deprecated_terms.py --mode "$OSB_TERM_MODE"
@@ -166,14 +166,21 @@ disable=print-statement,
         too-many-instance-attributes,
         too-many-statements,
         inconsistent-return-statements,
-        C0302,
+        too-many-lines,
         C4001,
-        R0916,
-        W0201,
-        W0613,
-        W0621,
+        too-many-boolean-expressions,
+        attribute-defined-outside-init,
+        unused-argument,
+        redefined-outer-name,
         invalid-docstring-quote,
-        raise-missing-from
+        raise-missing-from,
+        consider-using-with,
+        duplicate-code,
+        consider-using-from-import,
+        bad-option-value,
+        consider-using-dict-items,
+        unused-private-member,
+        use-a-generator
 
 
 # Enable the message, report, category or checker with the given id(s). You can

@@ -53,7 +53,7 @@ By default, workloads created will come with the following operations run in the
 
 To invoke the newly created workload, run the following:
 ```
-$ opensearch-benchmark execute_test \
+$ opensearch-benchmark run \
 --pipeline="benchmark-only" \
 --workload-path="<PATH OUTPUTTED IN THE OUTPUT OF THE CREATE-WORKLOAD COMMAND>" \
 --target-host="<CLUSTER ENDPOINT>" \

@@ -8,7 +8,7 @@ This document will walk you through on what's needed to start contributing code
     - [Setup](#setup)
 - [Importing the project into an IDE](#importing-the-project-into-an-ide)
 - [Setting Up a Local OpenSearch Cluster For OSB Development (Optional)](#setting-up-a-local-opensearch-cluster-for-osb-development-optional)
-- [Executing tests](#executing-tests)
+- [running tests](#running-tests)
     - [Unit tests](#unit-tests)
     - [Integration tests](#integration-tests)
 - [Submitting your changes for a pull request](#submitting-your-changes-for-a-pull-request)
@@ -28,7 +28,7 @@ This document will walk you through on what's needed to start contributing code
 
     `pyenv` requires that the C compiler and development libraries be installed, so that the specified Python versions can be build from source.  The installation instructions vary from platform to platform.
 
-    For Debian-based systems, install the following modules to continue with the next steps: 
+    For Debian-based systems, install the following modules to continue with the next steps:
     ```
     sudo apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev \
     libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \
@@ -65,9 +65,9 @@ This document will walk you through on what's needed to start contributing code
 
 ### Setup
 
-To develop OSB properly, it is recommended that you fork the official OpenSearch Benchmark repository. 
+To develop OSB properly, it is recommended that you fork the official OpenSearch Benchmark repository.
 
-For those working on WSL2, it is recommended to clone the repository and set up the working environment within the Linux subsystem. Refer to the guide for setting up WSL2 on [Visual Studio Code](https://code.visualstudio.com/docs/remote/wsl) or [PyCharm](https://www.jetbrains.com/help/pycharm/using-wsl-as-a-remote-interpreter.html#create-wsl-interpreter). 
+For those working on WSL2, it is recommended to clone the repository and set up the working environment within the Linux subsystem. Refer to the guide for setting up WSL2 on [Visual Studio Code](https://code.visualstudio.com/docs/remote/wsl) or [PyCharm](https://www.jetbrains.com/help/pycharm/using-wsl-as-a-remote-interpreter.html#create-wsl-interpreter).
 
 After you git cloned the forked copy of OpenSearch Benchmark, use the following command-line instructions to set up OpenSearch Benchmark for development:
 ```
@@ -98,9 +98,9 @@ In order to run tests within the PyCharm IDE, ensure the `Python Integrated Tool
 
 ## Setting Up a Local OpenSearch Cluster For OSB Development (Optional)
 
-### OpenSearch Installation  
+### OpenSearch Installation
 
-Download the latest release of OpenSearch from https://opensearch.org/downloads.html. If you are using WSL, make sure to download it into your `/home/<user>` directory instead of `/mnt/c`. 
+Download the latest release of OpenSearch from https://opensearch.org/downloads.html. If you are using WSL, make sure to download it into your `/home/<user>` directory instead of `/mnt/c`.
 ```
 wget https://artifacts.opensearch.org/releases/bundle/opensearch/<x.x.x>/opensearch-<x.x.x>-linux-x64.tar.gz
 tar -xf opensearch-x.x.x-linux-x64.tar.gz
@@ -110,17 +110,17 @@ NOTE: Have Docker running in the background for the next steps. Refer to the ins
 
 ### OpenSearch Cluster setup
 
-Add the following settings to the `opensearch.yml` file under the config directory 
+Add the following settings to the `opensearch.yml` file under the config directory
 ```
 vim config/opensearch.yml
 ```
 ```
 #
-discovery.type: single-node 
-plugins.security.disabled: true 
+discovery.type: single-node
+plugins.security.disabled: true
 #
 ```
-Run the opensearch-tar-install.sh script to install and setup a cluster for our use. 
+Run the opensearch-tar-install.sh script to install and setup a cluster for our use.
 ```
 bash opensearch-tar-install.sh
 ```
@@ -146,25 +146,25 @@ Check the output of `curl.exe "http://localhost:9200/_cluster/health?pretty"`. O
   "active_shards_percent_as_number" : 100.0
 }
 ```
-Now, you have a local cluster running! You can connect to this and run the workload for the next step. 
+Now, you have a local cluster running! You can connect to this and run the workload for the next step.
 
 ### Running Workloads on a locally installed Cluster
 
-Here's a sample executation of the geonames benchmark which can be found from the [workloads](https://github.com/opensearch-project/opensearch-benchmark-workloads) repo. 
+Here's a sample run of the geonames benchmark which can be found from the [workloads](https://github.com/opensearch-project/opensearch-benchmark-workloads) repo.
 ```
-opensearch-benchmark execute-test --pipeline=benchmark-only --workload=geonames --target-host=127.0.0.1:9200 --test-mode --workload-params '{"number_of_shards":"1","number_of_replicas":"0"}'
+opensearch-benchmark run --pipeline=benchmark-only --workload=geonames --target-host=127.0.0.1:9200 --test-mode --workload-params '{"number_of_shards":"1","number_of_replicas":"0"}'
 ```
 
 And we're done! You should be seeing the performance metrics soon enough!
 
-### Debugging 
+### Debugging
 
-**If you are not seeing any results, it should be an indicator that there is an issue with your cluster setup or the way the manager is accessing it**. Use the command below to view the logs. 
+**If you are not seeing any results, it should be an indicator that there is an issue with your cluster setup or the way the manager is accessing it**. Use the command below to view the logs.
 ```
 tail -f ~/.benchmark/logs/benchmark.log
 ```
 
-## Executing tests
+## running tests
 
 Once setup is complete, you may run the unit and integration tests.
 
@@ -230,7 +230,7 @@ make install
 To streamline the process, please refer to [this guide](https://github.com/opensearch-project/opensearch-benchmark/blob/main/PYTHON_SUPPORT_GUIDE.md)
 
 ### Debugging OpenSearch Benchmark in Developer Mode
-Many users find that the simplest way to debug OpenSearch Benchmark is by using developer mode. Users can activate developer mode by running `python3 -m pip install -e .` within the cloned OpenSearch Benchmark repository. Any changes made and saved will be reflected when OpenSearch Benchmark is run. Users can add loggers or print statements and see the changes reflected in subsequent runs. 
+Many users find that the simplest way to debug OpenSearch Benchmark is by using developer mode. Users can activate developer mode by running `python3 -m pip install -e .` within the cloned OpenSearch Benchmark repository. Any changes made and saved will be reflected when OpenSearch Benchmark is run. Users can add loggers or print statements and see the changes reflected in subsequent runs.
 
 ### Debugging Unittests in Visual Studio Code
 To run and debug unittests in Visual Studio Code, add the following configuration to the Python Debugger `launch.json` file. See [the official Visual Studio Code documentation](https://code.visualstudio.com/docs/editor/debugging) for more information on setting up and accessing `launch.json` file.

@@ -27,17 +27,17 @@ supported_python_versions = [(3, 8), (3, 9), (3, 10), (3, 11), (3, 12)]
 
 **Basic OpenSearch Benchmark command with distribution version and test mode**
 ```
-opensearch-benchmark execute-test --distribution-version=1.0.0 --workload=geonames --test-mode
+opensearch-benchmark run --distribution-version=1.0.0 --workload=geonames --test-mode
 ```
 
-**OpenSearch Benchmark command executing test on target-host in test mode**
+**OpenSearch Benchmark command running test on target-host in test mode**
 ```
-opensearch-benchmark execute-test --workload=geonames --pipeline=benchmark-only --target-host="<OPENSEARCH CLUSTER ENDPOINT>" --client-options="basic_auth_user:'<USERNAME>',basic_auth_password:'<PASSWORD>'" --test-mode"
+opensearch-benchmark run --workload=geonames --pipeline=benchmark-only --target-host="<OPENSEARCH CLUSTER ENDPOINT>" --client-options="basic_auth_user:'<USERNAME>',basic_auth_password:'<PASSWORD>'" --test-mode"
 ```
 
-**OpenSearch-Benchmark command executing test on target-host without test mode**
+**OpenSearch-Benchmark command running test on target-host without test mode**
 ```
-opensearch-benchmark execute-test --workload=geonames --pipeline=benchmark-only --target-host="<OPENSEARCH CLUSTER ENDPOINT>" --client-options="basic_auth_user:'<USERNAME>',basic_auth_password:'<PASSWORD>'"
+opensearch-benchmark run --workload=geonames --pipeline=benchmark-only --target-host="<OPENSEARCH CLUSTER ENDPOINT>" --client-options="basic_auth_user:'<USERNAME>',basic_auth_password:'<PASSWORD>'"
 ```
 
 To ensure that users are using the correct python versions, install the repository with `python3 -m pip install -e .` and run `which opensearch-benchmark` to get the path. Pre-append this path to each of the three commands above and re-run them in the command line.
@@ -46,12 +46,12 @@ Keep in mind the file path outputted differs for each operating system and might
 
 - For example: When running `which opensearch-benchmark` on an Ubuntu environment, the commad line outputs `/home/ubuntu/.pyenv/shims/opensearch-benchmark`. On closer inspection, the path points to a shell script. Thus, to invoke OpenSearch Benchmark, pre-=append the OpenSearch Benchmark command with `bash` and the path outputted earlier:
 ```
-bash -x /home/ubuntu/.pyenv/shims/opensearch-benchmark execute-test --workload=geonames --pipeline=benchmark-only --target-host="<OPENSEARCH CLUSTER ENDPOINT>" --client-options="basic_auth_user:'<USERNAME>',basic_auth_password:'<PASSWORD>'"
+bash -x /home/ubuntu/.pyenv/shims/opensearch-benchmark run --workload=geonames --pipeline=benchmark-only --target-host="<OPENSEARCH CLUSTER ENDPOINT>" --client-options="basic_auth_user:'<USERNAME>',basic_auth_password:'<PASSWORD>'"
 ```
 
 - Another example: When running `which opensearch-benchmark` on an Amazon Linux 2 environment, the command line outputs `~/.local/bin/opensearch-benchmark`. On closer inspection, the path points to a Python script. Thus, to invoke OpenSearch Benchmark, pre-append the OpenSearch Benchmark command with `python3` and the path outputted earlier:
 ```
-python3 ~/.local/bin/opensearch-benchmark execute-test --workload=geonames --pipeline=benchmark-only --target-host="<OPENSEARCH CLUSTER ENDPOINT>" --client-options="basic_auth_user:'<USERNAME>',basic_auth_password:'<PASSWORD>'"
+python3 ~/.local/bin/opensearch-benchmark run --workload=geonames --pipeline=benchmark-only --target-host="<OPENSEARCH CLUSTER ENDPOINT>" --client-options="basic_auth_user:'<USERNAME>',basic_auth_password:'<PASSWORD>'"
 ```
 
 ### Creating a Pull Request After Adding Changes and Testing Them Out

@@ -64,4 +64,4 @@ Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 License for the specific language governing permissions and limitations under
-the License.
+the License.
@@ -43,7 +43,7 @@ Add backport labels to PRs and commits so that changes can be added to `main` br
 * Since releases are generally published on Thursdays, maintainers should try to ensure all changes are merged in by Tuesday.
 * A week prior to the scheduled release, maintainers should announce the fact in the [#performance channel](https://opensearch.slack.com/archives/C0516H8EJ7R) within the OpenSearch Slack community.
 * Ensure that documentation is appropriately updated with respect to incoming changes prior to the release.
-  
+
 ## Release the new version of OpenSearch Benchmark to PyPI, Docker, and ECR
 
 1. Clone the official OpenSearch Benchmark git repository and change directory to it.  This is where the following commands will be issued.
@@ -73,7 +73,7 @@ Add backport labels to PRs and commits so that changes can be added to `main` br
     3. Run `opensearch-benchmark --version` to ensure that it is the correct version
     4. Run `opensearch-benchmark --help`
     5. Run `opensearch-benchmark list workloads`
-    6. Run a basic workload on Linux and MacOS:  `opensearch-benchmark execute-test --workload pmc --test-mode`
+    6. Run a basic workload on Linux and MacOS:  `opensearch-benchmark run --workload pmc --test-mode`
     7. If you are fastidious, you can check the installed source files at `` `python3 -m site --user-site`/osbenchmark `` to verify that a recent change is indeed present.
 
 8. Verify Docker Hub Staging OSB Image Works:
@@ -123,7 +123,7 @@ Send this message in the following channels in OpenSearch Community Slack:
 
 If an error occurs during build process and you need to retrigger the workflow, do the following:
 
-* Delete the tag locally: `git tag -d <VERSION>` 
+* Delete the tag locally: `git tag -d <VERSION>`
 * Delete the tag on GitHub: `git push --delete origin <VERSION>`
 * Delete the draft release on GitHub
 * Create the tag again and push it to re-initiate the release process.

@@ -74,7 +74,7 @@ def __str__(self, *args, **kwargs):
 def create_reader(bulk_size):
     metadata = params.GenerateActionMetaData(index_name="test-idx", type_name=None)
 
-    source = params.Slice(StaticSource, 0, sys.maxsize)
+    source = params.Slice(StaticSource, 0, sys.maxsize, None, None)
     reader = params.MetadataIndexDataReader(data_file="bogus",
                                             batch_size=bulk_size,
                                             bulk_size=bulk_size,