Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions .github/workflows/run_required_checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,17 @@ jobs:
- name: Check for Non-Markdown Changes
id: check_changes
run: |
if git diff --name-only ${{ github.event.before }} ${{ github.sha }} | grep -vE '\.md$'; then
echo "non_documentation_changes_found=true" >> $GITHUB_OUTPUT
BEFORE="${{ github.event.before }}"
AFTER="${{ github.sha }}"

if [ -z "$BEFORE" ] || \
git diff --name-only "$BEFORE" "$AFTER" | grep -qvE '\.md$'
then
# either BEFORE was empty, or we found at least one non‑.md change
echo "non_documentation_changes_found=true" >> "$GITHUB_OUTPUT"
else
echo "non_documentation_changes_found=false" >> $GITHUB_OUTPUT
# BEFORE was set and all changed files end in .md
echo "non_documentation_changes_found=false" >> "$GITHUB_OUTPUT"
fi

Run-integration-tests:
Expand Down
12 changes: 11 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,15 @@
# Marqo's requirements files are stored in marqo-base repo: https://github.com/marqo-ai/marqo-base/tree/main/requirements
# Every time we upgrade dependencies, we need to build a new base image. This process often requires upgrading Vespa
# as well (since Vespa regularly removes old version from their dns repo). In the long run, we'll merge marqo-base repo
# back into marqo repo, and improve the image building process. In the short term, we will add new platform-agnostic
# dependencies (or upgrades of these dependencies) in this file. Please not if the dependency is platform-relevant,
# meaning you need to use one version for arm64 and another for amd64, you still need to follow the process in
# marqo-base repo and build a new base image.

msgpack==1.1.0
msgpack_numpy==0.4.8
python-json-logger==3.3.0
python-json-logger==3.3.0
pydantic==2.11.1
fastapi==0.115.12
fastapi-utils==0.8.0
uvicorn==0.34.0
6 changes: 3 additions & 3 deletions src/marqo/api/models/add_docs_objects.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from typing import List, Dict
from typing import Optional, Any, Sequence

from pydantic import BaseModel, root_validator
from pydantic import Field
from pydantic.v1 import BaseModel, root_validator
from pydantic.v1 import Field

from marqo.tensor_search.enums import EnvVars
from marqo.tensor_search.models.private_models import ModelAuth
Expand All @@ -25,7 +25,7 @@ class Config:
mappings: Optional[dict] = None
documents: Sequence[Dict[str, Any]]
imageDownloadThreadCount: int = Field(default_factory=lambda: read_env_vars_and_defaults_ints(EnvVars.MARQO_IMAGE_DOWNLOAD_THREAD_COUNT_PER_REQUEST))
mediaDownloadThreadCount: Optional[int]
mediaDownloadThreadCount: Optional[int] = None
textChunkPrefix: Optional[str] = None

@root_validator
Expand Down
5 changes: 2 additions & 3 deletions src/marqo/api/models/embed_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@
"""
from typing import Union, List, Dict, Optional

import pydantic
from pydantic import Field, root_validator
from pydantic.v1 import Field, root_validator, validator

from marqo.base_model import MarqoBaseModel
from marqo.core.embed.embed import EmbedContentType
Expand All @@ -21,7 +20,7 @@ class EmbedRequest(MarqoBaseModel):
modelAuth: Optional[ModelAuth] = None
content_type: Optional[EmbedContentType] = Field(default=EmbedContentType.Query, alias="contentType")

@pydantic.validator('content')
@validator('content')
def validate_content(cls, value):
# Iterate through content list items
if (isinstance(value, list) or isinstance(value, dict)) and len(value) == 0:
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/api/models/get_batch_documents_request.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import Field, conlist
from pydantic.v1 import Field, conlist

from marqo.base_model import MarqoBaseModel

Expand Down
2 changes: 1 addition & 1 deletion src/marqo/api/models/recommend_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from marqo.core.models.interpolation_method import InterpolationMethod
from marqo.tensor_search.models.api_models import BaseMarqoModel
from pydantic import root_validator
from pydantic.v1 import root_validator
from marqo.tensor_search.models.score_modifiers_object import ScoreModifierLists


Expand Down
2 changes: 1 addition & 1 deletion src/marqo/api/models/update_documents.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Dict, Any, List

from pydantic import validator
from pydantic.v1 import validator

from marqo.base_model import ImmutableStrictBaseModel
from marqo.api.exceptions import BadRequestError
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/base_model.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from pydantic import BaseModel
from pydantic.v1 import BaseModel


class MarqoBaseModel(BaseModel):
Expand Down
9 changes: 3 additions & 6 deletions src/marqo/core/embed/embed.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,27 +2,24 @@
from timeit import default_timer as timer
from typing import List, Optional, Union, Dict

import pydantic

import marqo.api.exceptions as api_exceptions
import marqo.s2_inference.errors as s2_inference_errors
from marqo import exceptions as base_exceptions
from marqo.core.index_management.index_management import IndexManagement
from marqo.core.inference.api import Inference
from marqo.tensor_search import utils
from marqo.logging import get_logger
from marqo.tensor_search.models.api_models import BulkSearchQueryEntity
from marqo.tensor_search.models.private_models import ModelAuth
from marqo.tensor_search.models.search import Qidx
from marqo.tensor_search.telemetry import RequestMetricsStore
from marqo.logging import get_logger
from marqo.vespa.vespa_client import VespaClient

logger = get_logger(__name__)


class EmbedContentType(str, Enum):
Query = "query"
Document = "document"


class Embed:
def __init__(self, vespa_client: VespaClient, index_management: IndexManagement, inference: Inference):
self.vespa_client = vespa_client
Expand Down
27 changes: 13 additions & 14 deletions src/marqo/core/inference/api/inference.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
from abc import ABC, abstractmethod
from typing import Optional, Dict, Any, List, Tuple, Union

import pydantic
from numpy import ndarray
from pydantic import StrictStr, root_validator
from pydantic.v1 import StrictStr, root_validator, Field

from marqo.base_model import ImmutableBaseModel
from marqo.core.inference.api import Modality, PreprocessingConfigType
Expand All @@ -12,21 +11,21 @@


class ModelConfig(ImmutableBaseModel):
model_name: StrictStr = pydantic.Field(alias='modelName')
model_properties: Optional[Dict[str, Any]] = pydantic.Field(default=None, alias='modelProperties')
model_auth: Optional[ModelAuth] = pydantic.Field(default=None, alias='modelAuth')
normalize_embeddings: bool = pydantic.Field(default=True, alias='normalizeEmbeddings')
model_name: StrictStr = Field(alias='modelName')
model_properties: Optional[Dict[str, Any]] = Field(default=None, alias='modelProperties')
model_auth: Optional[ModelAuth] = Field(default=None, alias='modelAuth')
normalize_embeddings: bool = Field(default=True, alias='normalizeEmbeddings')


class InferenceRequest(ImmutableBaseModel):
modality: Modality
contents: List[str] = pydantic.Field(min_items=1)
device: Optional[str] = pydantic.Field(default=None)
model_config: ModelConfig = pydantic.Field(alias='modelConfig')
preprocessing_config: PreprocessingConfigType = pydantic.Field(alias='preprocessingConfig')
use_inference_cache: bool = pydantic.Field(default=False, alias='useInferenceCache')
contents: List[str] = Field(min_items=1)
device: Optional[str] = Field(default=None)
model_config: ModelConfig = Field(alias='modelConfig')
preprocessing_config: PreprocessingConfigType = Field(alias='preprocessingConfig')
use_inference_cache: bool = Field(default=False, alias='useInferenceCache')
# whether we should return error for individual content, when set to false, any error should fail the whole batch
return_individual_error: bool = pydantic.Field(default=True, alias='returnIndividualError')
return_individual_error: bool = Field(default=True, alias='returnIndividualError')

@root_validator(pre=False)
def check_preprocessing_config_matches_modality(cls, values):
Expand All @@ -47,8 +46,8 @@ class InferenceErrorModel(ImmutableBaseModel):
"""
A model class to store error information for each individual content
"""
status_code: int = pydantic.Field(default=400)
error_code: str = pydantic.Field(default='inference_error')
status_code: int = Field(default=400)
error_code: str = Field(default='inference_error')
error_message: str


Expand Down
39 changes: 19 additions & 20 deletions src/marqo/core/inference/api/preprocessing_config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
from abc import ABC, abstractmethod
from typing import Optional, Dict, Literal, List, Set, Union
from abc import ABC
from typing import Optional, Dict, Literal, Union

import pydantic
from pydantic import root_validator
from pydantic.v1 import root_validator, Field

from marqo.base_model import ImmutableBaseModel
from marqo.core.inference.api.modality import Modality
Expand All @@ -11,12 +10,12 @@
class PreprocessingConfig(ImmutableBaseModel, ABC):
"""Parent class of preprocessing config for all modality types"""
modality: str
should_chunk: bool = pydantic.Field(default=False, alias='shouldChunk')
should_chunk: bool = Field(default=False, alias='shouldChunk')


class ChunkConfig(ImmutableBaseModel):
split_length: int = pydantic.Field(gt=0, alias='splitLength')
split_overlap: int = pydantic.Field(ge=0, alias='splitOverlap')
split_length: int = Field(gt=0, alias='splitLength')
split_overlap: int = Field(ge=0, alias='splitOverlap')

@root_validator
def check_split_length_greater_than_overlap(cls, values):
Expand All @@ -29,14 +28,14 @@ def check_split_length_greater_than_overlap(cls, values):


class TextChunkConfig(ChunkConfig):
split_method: Literal['character', 'word', 'sentence', 'passage'] = pydantic.Field(alias='splitMethod')
split_method: Literal['character', 'word', 'sentence', 'passage'] = Field(alias='splitMethod')


class TextPreprocessingConfig(PreprocessingConfig):
"""Preprocessing config for text modality"""
modality: Literal[Modality.TEXT] = Modality.TEXT
text_prefix: Optional[str] = pydantic.Field(default=None, alias='textPrefix')
chunk_config: Optional[TextChunkConfig] = pydantic.Field(default=None, alias='chunkConfig')
text_prefix: Optional[str] = Field(default=None, alias='textPrefix')
chunk_config: Optional[TextChunkConfig] = Field(default=None, alias='chunkConfig')

@root_validator
def validate_chunk_config(cls, values):
Expand All @@ -52,15 +51,15 @@ def validate_chunk_config(cls, values):
class ImagePreprocessingConfig(PreprocessingConfig):
"""Preprocessing config for image modality"""
modality: Literal[Modality.IMAGE] = Modality.IMAGE
download_timeout_ms: int = pydantic.Field(default=3000, alias='downloadTimeoutMs') # default to 3000ms
download_thread_count: Optional[int] = pydantic.Field(default=None, alias='downloadThreadCount')
download_header: Optional[Dict[str, str]] = pydantic.Field(default=None, alias='downloadHeader')
download_timeout_ms: int = Field(default=3000, alias='downloadTimeoutMs') # default to 3000ms
download_thread_count: Optional[int] = Field(default=None, alias='downloadThreadCount')
download_header: Optional[Dict[str, str]] = Field(default=None, alias='downloadHeader')

# image chunking TODO this is going away in future versions
patch_method: Optional[
# TODO check if we need to support all methods in image_processor.chunk_image method
Literal['simple', 'frcnn', 'dino-v1', 'dino-v2', 'marqo-yolo']
] = pydantic.Field(
] = Field(
default=None,
alias='patchMethod'
)
Expand All @@ -79,9 +78,9 @@ def validate_chunk_config(cls, values):
class AudioPreprocessingConfig(PreprocessingConfig):
"""Preprocessing config for audio modality"""
modality: Literal[Modality.AUDIO] = Modality.AUDIO
download_thread_count: Optional[int] = pydantic.Field(default=None, alias='downloadThreadCount')
download_header: Optional[Dict[str, str]] = pydantic.Field(default=None, alias='downloadHeader')
chunk_config: Optional[ChunkConfig] = pydantic.Field(default=None, alias='chunkConfig')
download_thread_count: Optional[int] = Field(default=None, alias='downloadThreadCount')
download_header: Optional[Dict[str, str]] = Field(default=None, alias='downloadHeader')
chunk_config: Optional[ChunkConfig] = Field(default=None, alias='chunkConfig')

@root_validator
def validate_chunk_config(cls, values):
Expand All @@ -97,9 +96,9 @@ def validate_chunk_config(cls, values):
class VideoPreprocessingConfig(PreprocessingConfig):
"""Preprocessing config for video modality"""
modality: Literal[Modality.VIDEO] = Modality.VIDEO
download_thread_count: Optional[int] = pydantic.Field(default=None, alias='downloadThreadCount')
download_header: Optional[Dict[str, str]] = pydantic.Field(default=None, alias='downloadHeader')
chunk_config: Optional[ChunkConfig] = pydantic.Field(default=None, alias='chunkConfig')
download_thread_count: Optional[int] = Field(default=None, alias='downloadThreadCount')
download_header: Optional[Dict[str, str]] = Field(default=None, alias='downloadHeader')
chunk_config: Optional[ChunkConfig] = Field(default=None, alias='chunkConfig')

@root_validator
def validate_chunk_config(cls, values):
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/core/inference/tensor_fields_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from typing import List, Dict, Set, Optional, Any, cast, Callable

import numpy as np
from pydantic import BaseModel
from pydantic.v1 import BaseModel

from marqo.core import constants
from marqo.core.constants import MARQO_DOC_ID
Expand Down
4 changes: 2 additions & 2 deletions src/marqo/core/models/add_docs_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from typing import Optional, Union, Any, Sequence

import numpy as np
from pydantic import BaseModel, validator, root_validator
from pydantic import Field
from pydantic.v1 import BaseModel, validator, root_validator
from pydantic.v1 import Field

from marqo import marqo_docs
from marqo.api.exceptions import BadRequestError
Expand Down
5 changes: 4 additions & 1 deletion src/marqo/core/models/facets_parameters.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import List, Optional, Literal, Dict
from marqo.base_model import StrictBaseModel
from pydantic import Field, validator
from pydantic.v1 import Field, validator


class RangeConfiguration(StrictBaseModel):
class Config:
Expand All @@ -17,6 +18,7 @@ def validate_range(cls, to_value, values):
raise ValueError("'to' value must be greater than 'from' value")
return to_value


class FieldFacetsConfiguration(StrictBaseModel):
class Config:
allow_population_by_field_name = False # disable ability to pass max_results, only alias
Expand Down Expand Up @@ -63,6 +65,7 @@ def validate_ranges_used_only_for_number_field(cls, ranges, values):
raise ValueError("Ranges can only be used for 'number' facets")
return ranges


class FacetsParameters(StrictBaseModel):
class Config:
allow_population_by_field_name = False # disable ability to pass max_depth or max_results, only alias
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/core/models/hybrid_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from enum import Enum
from typing import List, Optional, Union

from pydantic import validator, root_validator
from pydantic.v1 import validator, root_validator

from marqo.base_model import StrictBaseModel
from marqo.tensor_search.models.score_modifiers_object import ScoreModifierLists
Expand Down
2 changes: 1 addition & 1 deletion src/marqo/core/models/marqo_add_documents_response.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List, Optional, Any, Dict, Set

from pydantic import Field, root_validator
from pydantic.v1 import Field, root_validator

from marqo.base_model import MarqoBaseModel

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import Optional, Union, List, Dict, Any

from pydantic import Field, root_validator
from pydantic.v1 import Field, root_validator

from marqo.base_model import MarqoBaseModel
from marqo.core.models.marqo_add_documents_response import BatchResponseStats
Expand Down
15 changes: 9 additions & 6 deletions src/marqo/core/models/marqo_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,13 @@
from abc import ABC, abstractmethod
from enum import Enum
from typing import List, Optional, Dict, Any, Set, Union

import pydantic
import semver
from pydantic import PrivateAttr, root_validator
from pydantic import ValidationError, validator
from pydantic.error_wrappers import ErrorWrapper
from pydantic.utils import ROOT_KEY

import pydantic.v1 as pydantic
from pydantic.v1 import PrivateAttr, root_validator
from pydantic.v1 import ValidationError, validator
from pydantic.v1.error_wrappers import ErrorWrapper
from pydantic.v1.utils import ROOT_KEY

from marqo.base_model import ImmutableStrictBaseModel, ImmutableBaseModel, StrictBaseModel
from marqo.core import constants
Expand Down Expand Up @@ -128,14 +128,17 @@ class TextPreProcessing(ImmutableStrictBaseModel):
split_overlap: int = pydantic.Field(ge=0, alias='splitOverlap')
split_method: TextSplitMethod = pydantic.Field(alias='splitMethod')


class VideoPreProcessing(ImmutableStrictBaseModel):
split_length: int = pydantic.Field(gt=0, alias='splitLength')
split_overlap: int = pydantic.Field(ge=0, alias='splitOverlap')


class AudioPreProcessing(ImmutableStrictBaseModel):
split_length: int = pydantic.Field(gt=0, alias='splitLength')
split_overlap: int = pydantic.Field(ge=0, alias='splitOverlap')


class ImagePreProcessing(ImmutableStrictBaseModel):
patch_method: Optional[PatchMethod] = pydantic.Field(alias='patchMethod')

Expand Down
Loading
Loading