Integration tests with 1 shards and 0 replicas #66

Workflow file for this run

.github/workflows/integration_tests_CI.yml at e0fa19c

	name: Integration Tests
	run-name: Integration tests with ${{ inputs.number_of_shards \|\| 1 }} shards and ${{ inputs.number_of_replicas \|\| 0 }} replicas
	# runs integration tests on AMD64 machine

	on:
	workflow_call:
	inputs:
	number_of_shards:
	type: number
	description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.'
	required: true
	default: 1

	number_of_replicas:
	type: number
	description: 'Number of replicas (groups in Vespa minus 1). Minimum of 0.'
	required: true
	default: 0

	workflow_dispatch:
	inputs:
	number_of_shards:
	type: number
	description: 'Number of shards (content nodes per group in Vespa). Minimum of 1.'
	required: true
	default: 1

	number_of_replicas:
	type: number
	description: 'Number of replicas (groups in Vespa - 1)'
	required: true
	default: 0

	push:
	branches:
	- mainline
	- releases/*
	paths-ignore:
	- '**.md'

	concurrency:
	group: integration-tests-${{ github.ref }}-${{ inputs.number_of_shards }}-${{ inputs.number_of_replicas }}
	cancel-in-progress: true

	permissions:
	contents: read
	id-token: write

	jobs:
	Determine-Vespa-Setup:
	runs-on: ubuntu-latest
	outputs:
	VESPA_MULTINODE_SETUP: ${{ steps.set_var.outputs.VESPA_MULTINODE_SETUP }}
	MULTINODE_TEST_ARGS: ${{ steps.set_var.outputs.MULTINODE_TEST_ARGS }}
	steps:
	- name: Determine VESPA_MULTINODE_SETUP
	id: set_var
	run: \|
	# For single node, initialize as false
	echo "VESPA_MULTINODE_SETUP=false" >> $GITHUB_OUTPUT
	echo "First assuming single node Vespa setup."

	# Extract inputs safely, defaulting to 1 (for shards), 0 (for replicas) if not present
	NUMBER_OF_SHARDS="${{ inputs.number_of_shards \|\| 1 }}"
	NUMBER_OF_REPLICAS="${{ inputs.number_of_replicas \|\| 0 }}"

	# Convert inputs to integers
	NUMBER_OF_SHARDS_INT=$(echo "$NUMBER_OF_SHARDS" \| awk '{print int($0)}')
	NUMBER_OF_REPLICAS_INT=$(echo "$NUMBER_OF_REPLICAS" \| awk '{print int($0)}')

	# Evaluate the conditions
	if [[ "$NUMBER_OF_SHARDS_INT" -gt 1 \|\| "$NUMBER_OF_REPLICAS_INT" -gt 0 ]]; then
	echo "Now using multi-node Vespa setup. Shards are $NUMBER_OF_SHARDS_INT and replicas are $NUMBER_OF_REPLICAS_INT."
	echo "VESPA_MULTINODE_SETUP=true" >> $GITHUB_OUTPUT
	# If multinode vespa, ignore unrelated tests to save time and prevent errors
	echo "MULTINODE_TEST_ARGS=--multinode --ignore=tests/integ_tests/core/index_management/test_index_management.py --ignore=tests/integ_tests/core/inference --ignore=tests/integ_tests/processing --ignore=tests/integ_tests/s2_inference" >> $GITHUB_OUTPUT
	fi

	Start-Runner1:
	needs:
	- Determine-Vespa-Setup
	uses: ./.github/workflows/start_ec2_runner.yml
	with:
	base-ami-id: ${{ vars.MARQO_CPU_AMD64_TESTS_INSTANCE_AMI }}
	ec2-instance-type: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP == 'true' && 'm6i.2xlarge' \|\| 'm6i.xlarge' }}
	secrets: inherit

	Start-Runner2:
	needs:
	- Determine-Vespa-Setup
	uses: ./.github/workflows/start_ec2_runner.yml
	with:
	base-ami-id: ${{ vars.MARQO_CPU_AMD64_TESTS_INSTANCE_AMI }}
	ec2-instance-type: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP == 'true' && 'm6i.2xlarge' \|\| 'm6i.xlarge' }}
	secrets: inherit

	Create-Matrix:
	needs:
	- Start-Runner1
	- Start-Runner2
	if: ${{ always() }} # Ensure the matrix is created even if one of the runners fails to start
	runs-on: ubuntu-latest
	outputs:
	labels: ${{ steps.create-matrix.outputs.labels }}
	ec2-instance-ids: ${{ steps.create-matrix.outputs.ec2-instance-ids }}
	steps:
	- name: Create matrix
	id: create-matrix
	run: \|
	LABELS='["${{ needs.Start-Runner1.outputs.label }}", "${{ needs.Start-Runner2.outputs.label }}"]'
	INSTANCE_IDS='["${{ needs.Start-Runner1.outputs.ec2-instance-id }}", "${{ needs.Start-Runner2.outputs.ec2-instance-id }}"]'

	echo "labels=$LABELS" >> $GITHUB_OUTPUT
	echo "ec2-instance-ids=$INSTANCE_IDS" >> $GITHUB_OUTPUT

	Test-Marqo:
	name: Run Integration Tests (Part ${{ matrix.part }}/1)
	needs:
	- Create-Matrix # Ensures it starts only after the runners are ready
	- Determine-Vespa-Setup
	strategy:
	fail-fast: false
	matrix:
	part: [ 0, 1 ] # Zero-based index for JSON list access
	runs-on: ${{ fromJSON(needs.Create-Matrix.outputs.labels)[matrix.part] }} # Selects the correct runner

	timeout-minutes: 60
	env:
	VESPA_MULTINODE_SETUP: ${{ needs.Determine-Vespa-Setup.outputs.VESPA_MULTINODE_SETUP }}
	MULTINODE_TEST_ARGS: ${{ needs.Determine-Vespa-Setup.outputs.MULTINODE_TEST_ARGS }}
	steps:
	- name: Checkout marqo repo
	uses: actions/checkout@v3
	with:
	path: marqo

	- name: Set up Python 3.9
	uses: actions/setup-python@v5
	with:
	python-version: "3.9"
	cache: "pip"

	- name: Checkout marqo-base for requirements
	uses: actions/checkout@v3
	with:
	repository: marqo-ai/marqo-base
	path: marqo-base

	- name: Install dependencies
	run: \|
	pip install -r marqo-base/requirements/amd64-gpu-requirements.txt
	# override base requirements with marqo requirements, if needed:
	pip install -r marqo/requirements.txt
	pip install -r marqo/requirements.dev.txt

	- name: Download nltk data
	run: \|
	python -m nltk.downloader punkt_tab

	- name: Build Vespa
	run: \|
	systemctl stop unattended-upgrades
	apt-get remove -y unattended-upgrades

	# Function to wait for the dpkg lock to be released
	function wait_for_dpkg_lock() {
	while sudo fuser /var/lib/dpkg/lock-frontend >/dev/null 2>&1; do
	echo "Waiting for the dpkg lock to be released..."
	sleep 5
	done
	}

	# Wait for the dpkg lock before updating and installing
	wait_for_dpkg_lock

	echo "Updating package list"
	apt-get update -y

	# Build Vespa components
	echo "Installing jdk 17"
	sudo apt-get install openjdk-17-jdk -y
	echo "Installing maven"
	sudo apt-get install maven -y
	echo "Building Vespa components"
	cd marqo/vespa
	mvn clean package

	- name: Start Vespa
	run: python marqo/scripts/vespa_local/vespa_local.py full-start --Shards ${{ inputs.number_of_shards \|\| 1 }} --Replicas ${{ inputs.number_of_replicas \|\| 0 }}

	- name: Run Integration Tests
	run: \|
	# Define these for use by marqo
	export VESPA_CONFIG_URL=http://localhost:19071
	export VESPA_DOCUMENT_URL=http://localhost:8080
	export VESPA_QUERY_URL=http://localhost:8080
	export HF_HUB_ENABLE_HF_TRANSFER=1

	export PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID=${{ secrets.PRIVATE_MODEL_TESTS_AWS_ACCESS_KEY_ID }}
	export PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY=${{ secrets.PRIVATE_MODEL_TESTS_AWS_SECRET_ACCESS_KEY }}
	export PRIVATE_MODEL_TESTS_HF_TOKEN=${{ secrets.PRIVATE_MODEL_TESTS_HF_TOKEN }}

	cd marqo

	export PYTHONPATH="./src:."
	set -o pipefail
	COVERAGE_FILE=.coverage.${{ matrix.part }} pytest ${{ env.MULTINODE_TEST_ARGS }} \
	--ignore=tests/integ_tests/test_documentation.py --durations=100 \
	--cov=src --cov-branch --cov-context=test --cov-report term:skip-covered \
	tests/integ_tests/ --split-n 2 --split-part ${{ matrix.part }} --split-by=classes --suppress-skips \| tee pytest_output.log

	- name: Upload Test Result
	uses: actions/upload-artifact@v4
	with:
	name: marqo-test-report-${{ matrix.part }}
	path: marqo/.coverage.${{ matrix.part }}
	include-hidden-files: true

	Merge-Coverage:
	name: Merge Coverage And Generate Reports
	needs:
	- Test-Marqo
	runs-on: ubuntu-latest
	steps:
	- name: Checkout marqo repo
	uses: actions/checkout@v3
	with:
	path: marqo

	- name: Download all coverage files
	uses: actions/download-artifact@v4

	- name: Move Coverage Files
	run: mv marqo-test-report-/. marqo/

	- name: DEBUG coverages
	run: \|
	cd marqo
	export PYTHONPATH="./src:."
	pip install coverage
	coverage debug data

	- name: Merge Coverage Results
	run: \|
	cd marqo
	export PYTHONPATH="./src:."
	pip install coverage
	coverage combine

	- name: Generate Coverage Reports
	run: \|
	cd marqo
	export PYTHONPATH="./src:."
	coverage report -i
	coverage xml -i
	coverage html -i

	- name: Check Test Coverage of New Code
	id: check_test_coverage
	continue-on-error: true
	run: \|
	cd marqo
	if [[ "${GITHUB_EVENT_NAME}" == "pull_request" ]]; then
	export BASE_BRANCH="${{ github.event.pull_request.base.ref }}"
	echo "Running diff-cover against branch $BASE_BRANCH"
	git fetch origin $BASE_BRANCH:$BASE_BRANCH
	pip install diff-cover
	diff-cover coverage.xml --html-report diff_cov.html --markdown-report diff_cov.md \
	--compare-branch $BASE_BRANCH --fail-under=95
	else
	echo "Skipping diff-cover on Push events"
	echo "Skipped diff-cover on Push events" > diff_cov.md
	touch diff_cov.html
	fi

	- name: Upload Merged Coverage Report
	uses: actions/upload-artifact@v4
	with:
	name: merged-coverage
	path: marqo/htmlcov/

	Stop-Runner:
	needs:
	- Create-Matrix # Ensures it starts only after the runners are ready
	- Test-Marqo # required to wait when the main job is done
	if: ${{ always() }} # Ensures the runner is stopped even if tests fail
	strategy:
	matrix:
	instance: [0, 1] # Zero-based index for JSON list access
	uses: ./.github/workflows/stop_ec2_runner.yml
	with:
	label: ${{ fromJSON(needs.Create-Matrix.outputs.labels)[matrix.instance] }}
	ec2-instance-id: ${{ fromJSON(needs.Create-Matrix.outputs.ec2-instance-ids)[matrix.instance] }}
	secrets: inherit

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Integration tests with 1 shards and 0 replicas #66

Workflow file

Integration tests with 1 shards and 0 replicas #66

Uh oh!

Jobs

Run details

Workflow file for this run