Skip to content

Integration tests with 1 shards and 0 replicas #66

Integration tests with 1 shards and 0 replicas

Integration tests with 1 shards and 0 replicas #66

name: Integration Tests
run-name: Integration tests with ${{ inputs.number_of_shards || 1 }} shards and ${{ inputs.number_of_replicas || 0 }} replicas
# runs integration tests on AMD64 machine
on:
workflow_call:
inputs:
number_of_shards:
type: number
description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.'
required: true
default: 1
number_of_replicas:
type: number
description: 'Number of replicas (groups in Vespa minus 1). Minimum of 0.'
required: true
default: 0
workflow_dispatch:
inputs:
number_of_shards:
type: number
description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.'
required: true
default: 1
number_of_replicas:
type: number
description: 'Number of replicas (groups in Vespa - 1)'
required: true
default: 0
push:
branches:
- mainline
- releases/*
paths-ignore:
- '**.md'
concurrency:
group: integration-tests-${{ github.ref }}-${{ inputs.number_of_shards }}-${{ inputs.number_of_replicas }}
cancel-in-progress: true
permissions:
contents: read
id-token: write
jobs:
Determine-Vespa-Setup:
runs-on: ubuntu-latest
outputs:
VESPA_MULTINODE_SETUP: ${{ steps.set_var.outputs.VESPA_MULTINODE_SETUP }}
MULTINODE_TEST_ARGS: ${{ steps.set_var.outputs.MULTINODE_TEST_ARGS }}
steps:
- name: Determine VESPA_MULTINODE_SETUP
id: set_var
run: |
# For single node, initialize as false
echo "VESPA_MULTINODE_SETUP=false" >> $GITHUB_OUTPUT
echo "First assuming single node Vespa setup."
# Extract inputs safely, defaulting to 1 (for shards), 0 (for replicas) if not present
NUMBER_OF_SHARDS="${{ inputs.number_of_shards || 1 }}"
NUMBER_OF_REPLICAS="${{ inputs.number_of_replicas || 0 }}"
# Convert inputs to integers
NUMBER_OF_SHARDS_INT=$(echo "$NUMBER_OF_SHARDS" | awk '{print int($0)}')
NUMBER_OF_REPLICAS_INT=$(echo "$NUMBER_OF_REPLICAS" | awk '{print int($0)}')
# Evaluate the conditions
if [[ "$NUMBER_OF_SHARDS_INT" -gt 1 || "$NUMBER_OF_REPLICAS_INT" -gt 0 ]]; then
echo "Now using multi-node Vespa setup. Shards are $NUMBER_OF_SHARDS_INT and replicas are $NUMBER_OF_REPLICAS_INT."
echo "VESPA_MULTINODE_SETUP=true" >> $GITHUB_OUTPUT
# If multinode vespa, ignore unrelated tests to save time and prevent errors
echo "MULTINODE_TEST_ARGS=--multinode --ignore=tests/integ_tests/core/index_management/test_index_management.py --ignore=tests/integ_tests/core/inference --ignore=tests/integ_tests/processing --ignore=tests/integ_tests/s2_inference" >> $GITHUB_OUTPUT
fi
Start-Runner1:
needs:
- Determine-Vespa-Setup
uses: ./.github/workflows/start_ec2_runner.yml
with:
base-ami-id: ${{ vars.MARQO_CPU_AMD64_TESTS_INSTANCE_AMI }}
ec2-instance-type: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP == 'true' && 'm6i.2xlarge' || 'm6i.xlarge' }}
secrets: inherit
Start-Runner2:
needs:
- Determine-Vespa-Setup
uses: ./.github/workflows/start_ec2_runner.yml
with:
base-ami-id: ${{ vars.MARQO_CPU_AMD64_TESTS_INSTANCE_AMI }}
ec2-instance-type: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP == 'true' && 'm6i.2xlarge' || 'm6i.xlarge' }}
secrets: inherit
Create-Matrix:
needs:
- Start-Runner1
- Start-Runner2
if: ${{ always() }} # Ensure the matrix is created even if one of the runners fails to start
runs-on: ubuntu-latest
outputs:
labels: ${{ steps.create-matrix.outputs.labels }}
ec2-instance-ids: ${{ steps.create-matrix.outputs.ec2-instance-ids }}
steps:
- name: Create matrix
id: create-matrix
run: |
LABELS='["${{ needs.Start-Runner1.outputs.label }}", "${{ needs.Start-Runner2.outputs.label }}"]'
INSTANCE_IDS='["${{ needs.Start-Runner1.outputs.ec2-instance-id }}", "${{ needs.Start-Runner2.outputs.ec2-instance-id }}"]'
echo "labels=$LABELS" >> $GITHUB_OUTPUT
echo "ec2-instance-ids=$INSTANCE_IDS" >> $GITHUB_OUTPUT
Test-Marqo:
name: Run Integration Tests (Part ${{ matrix.part }}/1)
needs:
- Create-Matrix # Ensures it starts only after the runners are ready
- Determine-Vespa-Setup
strategy:
fail-fast: false
matrix:
part: [ 0, 1 ] # Zero-based index for JSON list access
runs-on: ${{ fromJSON(needs.Create-Matrix.outputs.labels)[matrix.part] }} # Selects the correct runner
timeout-minutes: 60
env:
VESPA_MULTINODE_SETUP: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP }}
MULTINODE_TEST_ARGS: ${{ needs.Determine-Vespa-Setup.outputs.MULTINODE_TEST_ARGS }}
steps:
- name: Checkout marqo repo
uses: actions/checkout@v3
with:
path: marqo
- name: Set up Python 3.9
uses: actions/setup-python@v5
with:
python-version: "3.9"
cache: "pip"
- name: Checkout marqo-base for requirements
uses: actions/checkout@v3
with:
repository: marqo-ai/marqo-base
path: marqo-base
- name: Install dependencies
run: |
pip install -r marqo-base/requirements/amd64-gpu-requirements.txt
# override base requirements with marqo requirements, if needed:
pip install -r marqo/requirements.txt
pip install -r marqo/requirements.dev.txt
- name: Download nltk data
run: |
python -m nltk.downloader punkt_tab
- name: Build Vespa
run: |
systemctl stop unattended-upgrades
apt-get remove -y unattended-upgrades
# Function to wait for the dpkg lock to be released
function wait_for_dpkg_lock() {
while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do
echo "Waiting for the dpkg lock to be released..."
sleep 5
done
}
# Wait for the dpkg lock before updating and installing
wait_for_dpkg_lock
echo "Updating package list"
apt-get update -y
# Build Vespa components
echo "Installing jdk 17"
sudo apt-get install openjdk-17-jdk -y
echo "Installing maven"
sudo apt-get install maven -y
echo "Building Vespa components"
cd marqo/vespa
mvn clean package
- name: Start Vespa
run: python marqo/scripts/vespa_local/vespa_local.py full-start --Shards ${{ inputs.number_of_shards || 1 }} --Replicas ${{ inputs.number_of_replicas || 0 }}
- name: Run Integration Tests
run: |
# Define these for use by marqo
export VESPA_CONFIG_URL=http://localhost:19071
export VESPA_DOCUMENT_URL=http://localhost:8080
export VESPA_QUERY_URL=http://localhost:8080
export HF_HUB_ENABLE_HF_TRANSFER=1
export PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID=${{ secrets.PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID }}
export PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY=${{ secrets.PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY }}
export PRIVATE_MODEL_TESTS_HF_TOKEN=${{ secrets.PRIVATE_MODEL_TESTS_HF_TOKEN }}
cd marqo
export PYTHONPATH="./src:."
set -o pipefail
COVERAGE_FILE=.coverage.${{ matrix.part }} pytest ${{ env.MULTINODE_TEST_ARGS }} \
--ignore=tests/integ_tests/test_documentation.py --durations=100 \
--cov=src --cov-branch --cov-context=test --cov-report term:skip-covered \
tests/integ_tests/ --split-n 2 --split-part ${{ matrix.part }} --split-by=classes --suppress-skips | tee pytest_output.log
- name: Upload Test Result
uses: actions/upload-artifact@v4
with:
name: marqo-test-report-${{ matrix.part }}
path: marqo/.coverage.${{ matrix.part }}
include-hidden-files: true
Merge-Coverage:
name: Merge Coverage And Generate Reports
needs:
- Test-Marqo
runs-on: ubuntu-latest
steps:
- name: Checkout marqo repo
uses: actions/checkout@v3
with:
path: marqo
- name: Download all coverage files
uses: actions/download-artifact@v4
- name: Move Coverage Files
run: mv marqo-test-report-*/.* marqo/
- name: DEBUG coverages
run: |
cd marqo
export PYTHONPATH="./src:."
pip install coverage
coverage debug data
- name: Merge Coverage Results
run: |
cd marqo
export PYTHONPATH="./src:."
pip install coverage
coverage combine
- name: Generate Coverage Reports
run: |
cd marqo
export PYTHONPATH="./src:."
coverage report -i
coverage xml -i
coverage html -i
- name: Check Test Coverage of New Code
id: check_test_coverage
continue-on-error: true
run: |
cd marqo
if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
export BASE_BRANCH="${{ github.event.pull_request.base.ref }}"
echo "Running diff-cover against branch $BASE_BRANCH"
git fetch origin $BASE_BRANCH:$BASE_BRANCH
pip install diff-cover
diff-cover coverage.xml --html-report diff_cov.html --markdown-report diff_cov.md \
--compare-branch $BASE_BRANCH --fail-under=95
else
echo "Skipping diff-cover on Push events"
echo "Skipped diff-cover on Push events" > diff_cov.md
touch diff_cov.html
fi
- name: Upload Merged Coverage Report
uses: actions/upload-artifact@v4
with:
name: merged-coverage
path: marqo/htmlcov/
Stop-Runner:
needs:
- Create-Matrix # Ensures it starts only after the runners are ready
- Test-Marqo # required to wait when the main job is done
if: ${{ always() }} # Ensures the runner is stopped even if tests fail
strategy:
matrix:
instance: [0, 1] # Zero-based index for JSON list access
uses: ./.github/workflows/stop_ec2_runner.yml
with:
label: ${{ fromJSON(needs.Create-Matrix.outputs.labels)[matrix.instance] }}
ec2-instance-id: ${{ fromJSON(needs.Create-Matrix.outputs.ec2-instance-ids)[matrix.instance] }}
secrets: inherit