dragonflydb · dranikpg · Aug 18, 2023 · Aug 13, 2023 · Aug 17, 2023 · Aug 17, 2023
diff --git a/src/core/search/base.h b/src/core/search/base.h
@@ -1,11 +1,7 @@
 #pragma once
 
-#include <algorithm>
-#include <iostream>
-#include <memory>
-#include <ostream>
-#include <regex>
-#include <variant>
+#include <any>
+#include <string>
 #include <vector>
 
 #include "core/core_types.h"

diff --git a/src/core/search/search.cc b/src/core/search/search.cc
@@ -264,34 +264,34 @@ struct BasicSearch {
 }  // namespace
 
 FieldIndices::FieldIndices(Schema schema) : schema_{move(schema)}, all_ids_{}, indices_{} {
-  for (auto& [field, type] : schema_.fields) {
-    switch (type) {
-      case Schema::TAG:
-        indices_[field] = make_unique<TagIndex>();
+  for (const auto& [field_name, field_info] : schema_.fields) {
+    switch (field_info.type) {
+      case SchemaField::TAG:
+        indices_[field_name] = make_unique<TagIndex>();
         break;
-      case Schema::TEXT:
-        indices_[field] = make_unique<TextIndex>();
+      case SchemaField::TEXT:
+        indices_[field_name] = make_unique<TextIndex>();
         break;
-      case Schema::NUMERIC:
-        indices_[field] = make_unique<NumericIndex>();
+      case SchemaField::NUMERIC:
+        indices_[field_name] = make_unique<NumericIndex>();
         break;
-      case Schema::VECTOR:
-        indices_[field] = make_unique<VectorIndex>();
+      case SchemaField::VECTOR:
+        indices_[field_name] = make_unique<VectorIndex>();
         break;
     }
   }
 }
 
 void FieldIndices::Add(DocId doc, DocumentAccessor* access) {
   for (auto& [field, index] : indices_) {
-    index->Add(doc, access, field);
+    index->Add(doc, access, schema_.fields[field].identifier);
   }
   all_ids_.insert(upper_bound(all_ids_.begin(), all_ids_.end(), doc), doc);
 }
 
 void FieldIndices::Remove(DocId doc, DocumentAccessor* access) {
   for (auto& [field, index] : indices_) {
-    index->Remove(doc, access, field);
+    index->Remove(doc, access, schema_.fields[field].identifier);
   }
   auto it = lower_bound(all_ids_.begin(), all_ids_.end(), doc);
   CHECK(it != all_ids_.end() && *it == doc);
@@ -305,10 +305,10 @@ BaseIndex* FieldIndices::GetIndex(string_view field) const {
 
 std::vector<TextIndex*> FieldIndices::GetAllTextIndices() const {
   vector<TextIndex*> out;
-  for (auto& [field, type] : schema_.fields) {
-    if (type != Schema::TEXT)
+  for (auto& [field_name, field_info] : schema_.fields) {
+    if (field_info.type != SchemaField::TEXT)
       continue;
-    auto* index = dynamic_cast<TextIndex*>(GetIndex(field));
+    auto* index = dynamic_cast<TextIndex*>(GetIndex(field_name));
     DCHECK(index);
     out.push_back(index);
   }

diff --git a/src/core/search/search.h b/src/core/search/search.h
@@ -19,10 +19,15 @@ namespace dfly::search {
 struct AstNode;
 struct TextIndex;
 
-struct Schema {
+struct SchemaField {
   enum FieldType { TAG, TEXT, NUMERIC, VECTOR };
 
-  absl::flat_hash_map<std::string, FieldType> fields;
+  std::string identifier;
+  FieldType type;
+};
+
+struct Schema {
+  absl::flat_hash_map<std::string, SchemaField> fields;
 };
 
 // Collection of indices for all fields in schema

diff --git a/src/core/search/search_test.cc b/src/core/search/search_test.cc
@@ -68,10 +68,18 @@ struct MockedDocument : public DocumentAccessor {
   Map fields_{};
 };
 
+Schema MakeSimpleSchema(initializer_list<pair<string_view, SchemaField::FieldType>> ilist) {
+  Schema schema;
+  for (auto [name, type] : ilist) {
+    schema.fields[name] = {string{name}, type};
+  }
+  return schema;
+}
+
 class SearchParserTest : public ::testing::Test {
  protected:
   SearchParserTest() {
-    PrepareSchema();
+    PrepareSchema({{"field", SchemaField::TEXT}});
   }
 
   ~SearchParserTest() {
@@ -82,8 +90,8 @@ class SearchParserTest : public ::testing::Test {
     params_.knn_vec = vec;
   }
 
-  void PrepareSchema(Schema schema = {{{"field", Schema::TEXT}}}) {
-    schema_ = schema;
+  void PrepareSchema(initializer_list<pair<string_view, SchemaField::FieldType>> ilist) {
+    schema_ = MakeSimpleSchema(ilist);
   }
 
   void PrepareQuery(string_view query) {
@@ -248,7 +256,7 @@ TEST_F(SearchParserTest, CheckParenthesisPriority) {
 using Map = MockedDocument::Map;
 
 TEST_F(SearchParserTest, MatchField) {
-  PrepareSchema({{{"f1", Schema::TEXT}, {"f2", Schema::TEXT}, {"f3", Schema::TEXT}}});
+  PrepareSchema({{"f1", SchemaField::TEXT}, {"f2", SchemaField::TEXT}, {"f3", SchemaField::TEXT}});
   PrepareQuery("@f1:foo @f2:bar @f3:baz");
 
   ExpectAll(Map{{"f1", "foo"}, {"f2", "bar"}, {"f3", "baz"}});
@@ -260,7 +268,7 @@ TEST_F(SearchParserTest, MatchField) {
 }
 
 TEST_F(SearchParserTest, MatchRange) {
-  PrepareSchema({{{"f1", Schema::NUMERIC}, {"f2", Schema::NUMERIC}}});
+  PrepareSchema({{"f1", SchemaField::NUMERIC}, {"f2", SchemaField::NUMERIC}});
   PrepareQuery("@f1:[1 10] @f2:[50 100]");
 
   ExpectAll(Map{{"f1", "5"}, {"f2", "50"}}, Map{{"f1", "1"}, {"f2", "100"}},
@@ -277,7 +285,7 @@ TEST_F(SearchParserTest, MatchStar) {
 }
 
 TEST_F(SearchParserTest, CheckExprInField) {
-  PrepareSchema({{{"f1", Schema::TEXT}, {"f2", Schema::TEXT}, {"f3", Schema::TEXT}}});
+  PrepareSchema({{"f1", SchemaField::TEXT}, {"f2", SchemaField::TEXT}, {"f3", SchemaField::TEXT}});
   {
     PrepareQuery("@f1:(a|b) @f2:(c d) @f3:-e");
 
@@ -300,7 +308,7 @@ TEST_F(SearchParserTest, CheckExprInField) {
 }
 
 TEST_F(SearchParserTest, CheckTag) {
-  PrepareSchema({{{"f1", Schema::TAG}, {"f2", Schema::TAG}}});
+  PrepareSchema({{"f1", SchemaField::TAG}, {"f2", SchemaField::TAG}});
 
   PrepareQuery("@f1:{red | blue} @f2:{circle | square}");
 
@@ -316,7 +324,7 @@ TEST_F(SearchParserTest, CheckTag) {
 }
 
 TEST_F(SearchParserTest, SimpleKnn) {
-  Schema schema{{{"even", Schema::TAG}, {"pos", Schema::VECTOR}}};
+  auto schema = MakeSimpleSchema({{"even", SchemaField::TAG}, {"pos", SchemaField::VECTOR}});
   FieldIndices indices{schema};
 
   // Place points on a straight line
@@ -366,7 +374,7 @@ TEST_F(SearchParserTest, Simple2dKnn) {
   // 0      1
   const pair<float, float> kTestCoords[] = {{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0.5, 0.5}};
 
-  Schema schema{{{"pos", Schema::VECTOR}}};
+  auto schema = MakeSimpleSchema({{"pos", SchemaField::VECTOR}});
   FieldIndices indices{schema};
 
   for (size_t i = 0; i < ABSL_ARRAYSIZE(kTestCoords); i++) {

diff --git a/src/server/search/doc_accessors.cc b/src/server/search/doc_accessors.cc
@@ -8,6 +8,7 @@
 #include <absl/strings/str_join.h>
 
 #include <jsoncons/json.hpp>
+#include <jsoncons_ext/jsonpath/jsonpath.hpp>
 
 #include "core/json_object.h"
 #include "core/search/search.h"
@@ -55,7 +56,7 @@ SearchDocData ListPackAccessor::Serialize(search::Schema schema) const {
     string_view v = container_utils::LpGetView(fptr, intbuf_[1].data());
     fptr = lpNext(lp_, fptr);
 
-    if (schema.fields.at(k) == search::Schema::VECTOR)
+    if (schema.fields.at(k).type == search::SchemaField::VECTOR)
       out[k] = FtVectorToString(GetVector(k));
     else
       out[k] = v;
@@ -78,7 +79,7 @@ SearchDocData StringMapAccessor::Serialize(search::Schema schema) const {
     string_view k = SdsToSafeSv(kptr);
     string_view v = SdsToSafeSv(vptr);
 
-    if (schema.fields.at(k) == search::Schema::VECTOR)
+    if (schema.fields.at(k).type == search::SchemaField::VECTOR)
       out[k] = FtVectorToString(GetVector(k));
     else
       out[k] = v;
@@ -87,18 +88,47 @@ SearchDocData StringMapAccessor::Serialize(search::Schema schema) const {
   return out;
 }
 
+struct JsonAccessor::JsonPathContainer : public jsoncons::jsonpath::jsonpath_expression<JsonType> {
+};
+
 string_view JsonAccessor::GetString(string_view active_field) const {
-  buf_ = json_->get_value_or<string>(active_field, string{});
+  auto res = GetPath(active_field)->evaluate(*json_);
+  DCHECK(res.is_array());
+  if (res.empty())
+    return "";
+  buf_ = res[0].as_string();
   return buf_;
 }
 
 search::FtVector JsonAccessor::GetVector(string_view active_field) const {
+  auto res = GetPath(active_field)->evaluate(*json_);
+  DCHECK(res.is_array());
+  if (res.empty())
+    return {};
+
   search::FtVector out;
-  for (auto v : json_->at(active_field).array_range())
+  for (auto v : res[0].array_range())
     out.push_back(v.as<float>());
   return out;
 }
 
+JsonAccessor::JsonPathContainer* JsonAccessor::GetPath(std::string_view field) const {
+  if (auto it = path_cache_.find(field); it != path_cache_.end()) {
+    return it->second.get();
+  } else {
+    error_code ec;
+    auto path_expr = jsoncons::jsonpath::make_expression<JsonType>(field, ec);
+    DCHECK(!ec) << "missing validation on ft.create step";
+
+    JsonPathContainer path_container{move(path_expr)};
+    auto ptr = make_unique<JsonPathContainer>(move(path_container));
+
+    JsonPathContainer* path = ptr.get();
+    path_cache_[field] = move(ptr);
+    return path;
+  }
+}
+
 SearchDocData JsonAccessor::Serialize(search::Schema schema) const {
   SearchDocData out{};
   for (const auto& member : json_->object_range()) {
@@ -107,6 +137,9 @@ SearchDocData JsonAccessor::Serialize(search::Schema schema) const {
   return out;
 }
 
+thread_local absl::flat_hash_map<std::string, std::unique_ptr<JsonAccessor::JsonPathContainer>>
+    JsonAccessor::path_cache_;
+
 unique_ptr<BaseAccessor> GetAccessor(const DbContext& db_cntx, const PrimeValue& pv) {
   DCHECK(pv.ObjType() == OBJ_HASH || pv.ObjType() == OBJ_JSON);
 

diff --git a/src/server/search/doc_accessors.h b/src/server/search/doc_accessors.h
@@ -55,6 +55,8 @@ struct StringMapAccessor : public BaseAccessor {
 
 // Accessor for json values
 struct JsonAccessor : public BaseAccessor {
+  struct JsonPathContainer;  // contains jsoncons::jsonpath::jsonpath_expression
+
   explicit JsonAccessor(JsonType* json) : json_{json} {
   }
 
@@ -63,8 +65,14 @@ struct JsonAccessor : public BaseAccessor {
   SearchDocData Serialize(search::Schema schema) const override;
 
  private:
-  mutable std::string buf_;
+  JsonPathContainer* GetPath(std::string_view field) const;
+
   JsonType* json_;
+  mutable std::string buf_;
+
+  // Contains built json paths to avoid parsing them repeatedly
+  static thread_local absl::flat_hash_map<std::string, std::unique_ptr<JsonPathContainer>>
+      path_cache_;
 };
 
 // Get accessor for value