marqo-ai
diff --git a/‎src/marqo/core/index_management/vespa_application_package.py
Lines changed: 39 additions & 3 deletions b/‎src/marqo/core/index_management/vespa_application_package.py
Lines changed: 39 additions & 3 deletions
diff --git a/‎src/marqo/core/monitoring/statsd_middleware.py
Lines changed: 29 additions & 47 deletions b/‎src/marqo/core/monitoring/statsd_middleware.py
Lines changed: 29 additions & 47 deletions
diff --git a/‎src/marqo/core/search/recommender.py
Lines changed: 2 additions & 5 deletions b/‎src/marqo/core/search/recommender.py
Lines changed: 2 additions & 5 deletions
diff --git a/‎src/marqo/tensor_search/models/search.py
Lines changed: 0 additions & 1 deletion b/‎src/marqo/tensor_search/models/search.py
Lines changed: 0 additions & 1 deletion
diff --git a/‎src/marqo/tensor_search/tensor_search.py
Lines changed: 2 additions & 5 deletions b/‎src/marqo/tensor_search/tensor_search.py
Lines changed: 2 additions & 5 deletions
diff --git a/‎src/marqo/version.py
Lines changed: 1 addition & 1 deletion b/‎src/marqo/version.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/integ_tests/core/index_management/test_index_management.py
Lines changed: 46 additions & 0 deletions b/‎tests/integ_tests/core/index_management/test_index_management.py
Lines changed: 46 additions & 0 deletions
diff --git a/‎tests/integ_tests/core/monitoring/test_metrics_udp.py
Lines changed: 6 additions & 8 deletions b/‎tests/integ_tests/core/monitoring/test_metrics_udp.py
Lines changed: 6 additions & 8 deletions
diff --git a/‎tests/integ_tests/tensor_search/search/test_search_with_context.py
Lines changed: 0 additions & 53 deletions b/‎tests/integ_tests/tensor_search/search/test_search_with_context.py
Lines changed: 0 additions & 53 deletions
diff --git a/‎tests/integ_tests/tensor_search/test_api_query_logging_integration.py
Lines changed: 2 additions & 4 deletions b/‎tests/integ_tests/tensor_search/test_api_query_logging_integration.py
Lines changed: 2 additions & 4 deletions
@@ -81,16 +81,52 @@ def _cleanup_container_config(self):
         """
         Components config needs to be in sync with the components in the jar files. This method cleans up the
         custom components config, so we can always start fresh. This assumes that the container section of the
-        services.xml file only has `node` config and empty `document-api` and `search` elements initially. Please
-        note that any manual config change in container section will be overwritten.
+        services.xml file only has empty `document-api`, `document-processing`, `search` elements, and the preserved
+        elements initially.
+
+        Please note that:
+        - Any manual config change to non-preserved elements in the container section will be overwritten during Vespa
+        bootstrapping (when a new version of Marqo is deployed).
+        - Manual rollback will replace the entire services.xml file with the previous version. This means the changes
+        to the preserved elements will also be reverted.
+        
+        Preserved elements:
+        - <nodes>...</nodes>: container nodes configuration
+        - <config name="com.yahoo.document.restapi.document-operation-executor">...</config>: doc operation executors
         """
         container_element = self._ensure_only_one('container')
         for child in container_element.findall('*'):
+            if self._should_preserve_container_element(child):
+                continue
+
             if child.tag in ['document-api', 'document-processing', 'search']:
+                # clear the children of these elements to add config
                 child.clear()
-            elif child.tag != 'nodes':
+            else:
+                # clean up other components
                 container_element.remove(child)
 
+    def _should_preserve_container_element(self, element):
+        """
+        Determines if a container element should be preserved during cleanup.
+        
+        Args:
+            element: XML element to check
+            
+        Returns:
+            bool: True if element should be preserved, False if it should be removed
+        """
+        # Always preserve nodes element
+        if element.tag == 'nodes':
+            return True
+            
+        # Preserve document-operation-executor config
+        if (element.tag == 'config' and 
+            element.get('name') == 'com.yahoo.document.restapi.document-operation-executor'):
+            return True
+            
+        return False
+
     def _config_search(self):
         search_elements = self._ensure_only_one('container/search')
         chain = ET.SubElement(search_elements, 'chain')
 
@@ -8,22 +8,18 @@
 
 from marqo.core.monitoring.statsd_client import StatsDClient
 
-_SEARCH_RE = re.compile(r"/indexes/[^/]+/search$")
 _DOCS_RE = re.compile(r"/indexes/[^/]+/documents$")
 _DOCUMENT_ID_RE = re.compile(r"(/documents/)[^/]+")
 
 
 class StatsDMiddleware(BaseHTTPMiddleware):
     """
-    Emits the former Reverse-Proxy (RP) CloudWatch metrics via StatsD.
+    Emits high-cardinality generic metrics.
 
-    Metrics implemented (parity with RP):
-      • requests.completed                counter   • status_code
-                                              also  • path, method, status_code
-      • marqo_processing_time             timing    (no tags)
-      • search_processing_time            timing    (no tags)
-      • index_processing_time             timing    (no tags)
-      • x-count-success / -failure / -error  counter • method
+    • request.duration_ms   |ms  path,method,status_code
+    • batch.success         |c   path,method,status_code
+    • batch.failure         |c   path,method,status_code
+    • batch.error           |c   path,method,status_code
     """
 
     def __init__(self, app, statsd_client: StatsDClient):
@@ -38,44 +34,30 @@ async def dispatch(self, request: Request, call_next):
         response: Response = await call_next(request)
         duration_ms = int((time.perf_counter() - t_start) * 1000)
 
-        status = response.status_code
-        status_tag = f"{status // 100}XX"      # 2XX / 3XX / 4XX / 5XX
-
-        # --- requests.completed (status-only) ------------------------
-        self.statsd.increment("requests.completed", tags={"status_code": status_tag})
-
-        # --- requests.completed (path/method/status variant) ---------
-        sanitized_path = self._sanitize_path(request.url.path)
-        self.statsd.increment(
-            "requests.completed",
-            tags={
-                "path": sanitized_path,
-                "method": request.method,
-                "status_code": status_tag,
-            },
-        )
-
-        # --- marqo_processing_time -----------------------------
-        self.statsd.timing("marqo_processing_time", duration_ms)
-
-        # --- search_processing_time ----------------------------
-        if _SEARCH_RE.fullmatch(request.url.path):
-            self.statsd.timing("search_processing_time", duration_ms)
-
-        # --- index_processing_time and x-count-* counters -------
-        if _DOCS_RE.fullmatch(request.url.path):
-            if request.method in {"POST", "PATCH"}:
-                self.statsd.timing("index_processing_time", duration_ms)
-
-            if request.method in {"POST", "PATCH", "GET"}:
-                lowered: Dict[str, str] = {k.lower(): v for k, v in response.headers.items()}
-                for hdr in ("x-count-success", "x-count-failure", "x-count-error"):
-                    if hdr in lowered:
-                        try:
-                            self.statsd.increment(hdr, int(lowered[hdr]), tags={"method": request.method})
-                        except ValueError:
-                            # Header value wasn’t an int – ignore
-                            pass
+        path_tag = self._sanitize_path(request.url.path)
+        tags = {
+            "path": path_tag,
+            "method": request.method,
+            "status_code": str(response.status_code),
+        }
+
+        # latency
+        self.statsd.timing("request.duration_ms", duration_ms, tags=tags)
+
+        # batch outcome counters
+        if _DOCS_RE.fullmatch(request.url.path) and request.method in {"POST", "PATCH", "GET"}:
+            lowered: Dict[str, str] = {k.lower(): v for k, v in response.headers.items()}
+            for hdr, metric in (
+                    ("x-count-success", "batch.success"),
+                    ("x-count-failure", "batch.failure"),
+                    ("x-count-error", "batch.error"),
+            ):
+                if hdr in lowered:
+                    try:
+                        self.statsd.increment(metric, int(lowered[hdr]), tags=tags)
+                    except ValueError:
+                        # Header value wasn’t an int – ignore
+                        pass
 
         return response
 
 
@@ -26,8 +26,7 @@ def __init__(self, vespa_client: VespaClient, index_management: IndexManagement,
     def get_doc_vectors_from_ids(self,
                   index_name: str,
                   documents: Union[List[str], Dict[str, float]],
-                  tensor_fields: Optional[List[str]] = None,
-                  concurrency: Optional[int] = None) -> Dict[str, List[List[float]]]:
+                  tensor_fields: Optional[List[str]] = None) -> Dict[str, List[List[float]]]:
         """
         This method gets documents from Vespa using their IDs, removes any unnecessary data, checks for
         lack of vectors, then returns a list of document vectors. Can be used internally (in recommend)
@@ -37,7 +36,6 @@ def get_doc_vectors_from_ids(self,
             index_name: Name of the index to search
             documents: A list of document IDs or a dictionary where the keys are document IDs and the values are weights
             tensor_fields: List of tensor fields to use for recommendation (can include text, image, audio, and video fields)
-            concurrency: Max number of concurrent requests to use when fetching documents by batch
 
         Returns:
             A dictionary mapping document IDs to lists of vector embeddings. This is flattened to 1 list per document
@@ -90,8 +88,7 @@ def get_doc_vectors_from_ids(self,
             config.Config(self.vespa_client, inference=self.inference),
             index_name, 
             document_ids, 
-            tensor_fields=tensor_fields,
-            concurrency=concurrency
+            tensor_fields=tensor_fields
         )
 
         # Check that all documents were found
 
@@ -69,7 +69,6 @@ class SearchContextTensor(BaseModel):
 class SearchContextDocumentsParameters(BaseModel):
     tensor_fields: Optional[List[str]] = Field(None, alias='tensorFields')
     exclude_input_documents: bool = Field(True, alias='excludeInputDocuments')
-    concurrency: Optional[int] = None
 
     @validator('tensor_fields', pre=True, always=True)
     def check_tensor_fields_not_empty(cls, v):
 
@@ -943,8 +943,7 @@ def get_query_vectors_from_jobs(
                                             context_doc_vectors = config.recommender.get_doc_vectors_from_ids(
                             index_name=q.index.name,
                             documents=context_documents.ids,
-                            tensor_fields=context_documents.parameters.tensor_fields,
-                            concurrency=context_documents.parameters.concurrency
+                            tensor_fields=context_documents.parameters.tensor_fields
                         )
 
                 # Update weights and vectors list
@@ -1354,7 +1353,6 @@ def get_doc_vectors_per_tensor_field_by_ids(
     index_name: str, 
     document_ids: List[str],
     tensor_fields: Optional[List[str]] = None,
-    concurrency: Optional[int] = None
 ) -> Dict[str, Dict[str, List[List[float]]]]:
     """
     Get only the embeddings for documents by their IDs.
@@ -1383,8 +1381,7 @@ def get_doc_vectors_per_tensor_field_by_ids(
         batch_get = config.vespa_client.get_batch(
             document_ids,
             marqo_index.schema_name,
-            fields=fields_to_retrieve,
-            concurrency=concurrency
+            fields=fields_to_retrieve
         )
 
     vespa_index = vespa_index_factory(marqo_index)
 
@@ -1,4 +1,4 @@
-__version__ = "2.22.0"
+__version__ = "2.22.1"
 
 def get_version() -> str:
     return f"{__version__}"
@@ -219,6 +219,52 @@ def test_bootstrap_vespa_should_override_and_backup_configs(self):
                 os.path.join(self._test_dir, 'existing_vespa_app', *file)
             )
 
+    def test_bootstrap_vespa_should_preserve_document_operation_executor_config(self):
+        """
+        Integration test to verify that document-operation-executor config is preserved
+        during the bootstrap process and not overridden by Marqo OS bootstrapping logic.
+        """
+        # Deploy initial app package
+        self._deploy_initial_app_package()
+        
+        # First bootstrap to get Marqo configured
+        self.index_management.bootstrap_vespa()
+        
+        # Now manually add document-operation-executor config to simulate Cloud team configuration
+        app = self.index_management._get_vespa_application()
+        services_xml_content = app._store.read_text_file('services.xml')
+        
+        # Insert document-operation-executor config into the container section
+        # Find a position after search config but before nodes
+        services_xml_with_config = services_xml_content.replace(
+            '</search>',
+            '''</search>
+            <config name="com.yahoo.document.restapi.document-operation-executor">
+                <maxThrottled>0</maxThrottled>
+            </config>'''
+        )
+        
+        app._store.save_file(services_xml_with_config, 'services.xml')
+        app._deploy()
+        
+        # Bootstrap Marqo again - this should preserve the document-operation-executor config
+        self.index_management.bootstrap_vespa()
+        
+        # Verify the config is still present after bootstrap
+        bootstrapped_app = str(self.vespa_client.download_application())
+        services_xml_path = os.path.join(bootstrapped_app, 'services.xml')
+        
+        with open(services_xml_path, 'r') as f:
+            final_services_xml = f.read()
+        
+        # Assert that the document-operation-executor config is preserved
+        self.assertIn('config name="com.yahoo.document.restapi.document-operation-executor"', final_services_xml)
+        self.assertIn('<maxThrottled>0</maxThrottled>', final_services_xml)
+        
+        # Also verify that Marqo components were properly added
+        self.assertIn('ai.marqo.search.HybridSearcher', final_services_xml)
+        self.assertIn('ai.marqo.index.IndexSettingRequestHandler', final_services_xml)
+
     def test_rollback_should_succeed(self):
         self._deploy_existing_app_package()
         self.index_management.bootstrap_vespa()
 
@@ -144,14 +144,12 @@ def test_metrics_roundtrip(self):
         self.client.get("/indexes/foo/documents/abc123")  # redaction
 
         patterns = [
-            r"marqo_processing_time:\d+\|ms",
-            r"requests\.completed:1\|c\|#status_code:\dXX",
-            r"search_processing_time:\d+\|ms",
-            r"index_processing_time:\d+\|ms",
-            r"x-count-success:\d+\|c",
-            r"x-count-failure:\d+\|c",
-            r"x-count-error:\d+\|c",
-            r"requests\.completed:1\|c\|#path:/indexes/foo/documents(?:/<document_id>)?,method:(?:GET|POST),status_code:\dXX",
+            r"request\.duration_ms:\d+\|ms\|#path:/indexes/foo/search,method:GET,status_code:200",
+            r"request\.duration_ms:\d+\|ms\|#path:/indexes/foo/documents,method:POST,status_code:200",
+            r"request\.duration_ms:\d+\|ms\|#path:/indexes/foo/documents/<document_id>,method:GET,status_code:200",
+            r"batch\.success:1\|c\|#path:/indexes/foo/documents,method:POST,status_code:200",
+            r"batch\.failure:0\|c\|#path:/indexes/foo/documents,method:POST,status_code:200",
+            r"batch\.error:0\|c\|#path:/indexes/foo/documents,method:POST,status_code:200",
         ]
 
         # Wait until the six packets we assert on have arrived
 
@@ -933,59 +933,6 @@ def mock_get_batch(*args, **kwargs):
                             # But we should still have embeddings
                             self.assertGreater(len(doc_embeddings), 0)
 
-    def test_search_with_context_documents_concurrency_parameter_controls_vespa_concurrency(self):
-        """Test that context.documents.parameters.concurrency is passed to vespa_client.get_batch."""
-        index = self.structured_default_text_index
-        
-        # Add documents to the index
-        docs = [
-            {"_id": "doc1", "text_field_1": "Test document 1"},
-            {"_id": "doc2", "text_field_1": "Test document 2"}
-        ]
-
-        self.add_documents(
-            config=self.config,
-            add_docs_params=AddDocsParams(
-                index_name=index.name,
-                docs=docs,
-                tensor_fields=None
-            )
-        )
-
-        # Mock vespa_client.get_batch to capture the concurrency parameter
-        original_get_batch = self.config.vespa_client.get_batch
-        captured_concurrency = []
-
-        def mock_get_batch(*args, **kwargs):
-            captured_concurrency.append(kwargs.get('concurrency'))
-            return original_get_batch(*args, **kwargs)
-
-        with mock.patch.object(self.config.vespa_client, 'get_batch', side_effect=mock_get_batch):
-            # Create search context with specific concurrency
-            search_context = SearchContext(
-                documents=SearchContextDocuments(
-                    ids={"doc1": 1.0, "doc2": 1.0},
-                    parameters=SearchContextDocumentsParameters(
-                        tensorFields=["text_field_1"],
-                        excludeInputDocuments=False,
-                        concurrency=5  # Test with concurrency=5
-                    )
-                )
-            )
-
-            # Perform search with context documents
-            tensor_search.search(
-                config=self.config,
-                index_name=index.name,
-                text=None,
-                context=search_context,
-                result_count=5
-            )
-
-            # Verify that get_batch was called with the correct concurrency parameter
-            self.assertEqual(len(captured_concurrency), 1, "get_batch should have been called")
-            self.assertEqual(captured_concurrency[0], 5, "get_batch should be called with concurrency=5")
-
     def test_search_with_context_documents_max_search_context_docs_env_var(self):
         """Test that MARQO_MAX_SEARCH_CONTEXT_DOCS environment variable controls the limit for context documents."""
 
 
@@ -170,8 +170,7 @@ def test_slow_query_logging_all_fields_sanitised_excluding_secret_fields(self):
                     },
                     "parameters": {
                         "tensorFields": ["text_field_1"],
-                        "excludeInputDocuments": False,
-                        "concurrency": 5,
+                        "excludeInputDocuments": False
                     }
                 }
             },
@@ -251,8 +250,7 @@ def test_slow_query_logging_all_fields_sanitised_excluding_secret_fields(self):
                     },
                     "parameters": {
                         "tensorFields": ["text_field_1"],
-                        "excludeInputDocuments": False,
-                        "concurrency": 5,
+                        "excludeInputDocuments": False
                     }
                 }
             },
Original file line number	Diff line number	Diff line change
`@@ -170,8 +170,7 @@ def test_slow_query_logging_all_fields_sanitised_excluding_secret_fields(self):`
`170`	`170`	`},`
`171`	`171`	`"parameters": {`
`172`	`172`	`"tensorFields": ["text_field_1"],`
`173`		`- "excludeInputDocuments": False,`
`174`		`- "concurrency": 5,`
	`173`	`+ "excludeInputDocuments": False`
`175`	`174`	`}`
`176`	`175`	`}`
`177`	`176`	`},`
`@@ -251,8 +250,7 @@ def test_slow_query_logging_all_fields_sanitised_excluding_secret_fields(self):`
`251`	`250`	`},`
`252`	`251`	`"parameters": {`
`253`	`252`	`"tensorFields": ["text_field_1"],`
`254`		`- "excludeInputDocuments": False,`
`255`		`- "concurrency": 5,`
	`253`	`+ "excludeInputDocuments": False`
`256`	`254`	`}`
`257`	`255`	`}`
`258`	`256`	`},`