-
Notifications
You must be signed in to change notification settings - Fork 1.1k
feat: json paths in search #1695
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
#include <absl/strings/str_join.h> | ||
|
||
#include <jsoncons/json.hpp> | ||
#include <jsoncons_ext/jsonpath/jsonpath.hpp> | ||
|
||
#include "core/json_object.h" | ||
#include "core/search/search.h" | ||
|
@@ -55,7 +56,7 @@ SearchDocData ListPackAccessor::Serialize(search::Schema schema) const { | |
string_view v = container_utils::LpGetView(fptr, intbuf_[1].data()); | ||
fptr = lpNext(lp_, fptr); | ||
|
||
if (schema.fields.at(k) == search::Schema::VECTOR) | ||
if (schema.fields.at(k).type == search::SchemaField::VECTOR) | ||
out[k] = FtVectorToString(GetVector(k)); | ||
else | ||
out[k] = v; | ||
|
@@ -78,7 +79,7 @@ SearchDocData StringMapAccessor::Serialize(search::Schema schema) const { | |
string_view k = SdsToSafeSv(kptr); | ||
string_view v = SdsToSafeSv(vptr); | ||
|
||
if (schema.fields.at(k) == search::Schema::VECTOR) | ||
if (schema.fields.at(k).type == search::SchemaField::VECTOR) | ||
out[k] = FtVectorToString(GetVector(k)); | ||
else | ||
out[k] = v; | ||
|
@@ -87,18 +88,47 @@ SearchDocData StringMapAccessor::Serialize(search::Schema schema) const { | |
return out; | ||
} | ||
|
||
struct JsonAccessor::JsonPathContainer : public jsoncons::jsonpath::jsonpath_expression<JsonType> { | ||
}; | ||
|
||
string_view JsonAccessor::GetString(string_view active_field) const { | ||
buf_ = json_->get_value_or<string>(active_field, string{}); | ||
auto res = GetPath(active_field)->evaluate(*json_); | ||
dranikpg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
DCHECK(res.is_array()); | ||
if (res.empty()) | ||
return ""; | ||
buf_ = res[0].as_string(); | ||
return buf_; | ||
} | ||
|
||
search::FtVector JsonAccessor::GetVector(string_view active_field) const { | ||
auto res = GetPath(active_field)->evaluate(*json_); | ||
DCHECK(res.is_array()); | ||
if (res.empty()) | ||
return {}; | ||
|
||
search::FtVector out; | ||
for (auto v : json_->at(active_field).array_range()) | ||
for (auto v : res[0].array_range()) | ||
out.push_back(v.as<float>()); | ||
return out; | ||
} | ||
|
||
JsonAccessor::JsonPathContainer* JsonAccessor::GetPath(std::string_view field) const { | ||
if (auto it = path_cache_.find(field); it != path_cache_.end()) { | ||
return it->second.get(); | ||
} else { | ||
dranikpg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
error_code ec; | ||
auto path_expr = jsoncons::jsonpath::make_expression<JsonType>(field, ec); | ||
DCHECK(!ec) << "missing validation on ft.create step"; | ||
|
||
JsonPathContainer path_container{move(path_expr)}; | ||
auto ptr = make_unique<JsonPathContainer>(move(path_container)); | ||
|
||
JsonPathContainer* path = ptr.get(); | ||
path_cache_[field] = move(ptr); | ||
return path; | ||
} | ||
} | ||
|
||
SearchDocData JsonAccessor::Serialize(search::Schema schema) const { | ||
SearchDocData out{}; | ||
for (const auto& member : json_->object_range()) { | ||
|
@@ -107,6 +137,9 @@ SearchDocData JsonAccessor::Serialize(search::Schema schema) const { | |
return out; | ||
} | ||
|
||
thread_local absl::flat_hash_map<std::string, std::unique_ptr<JsonAccessor::JsonPathContainer>> | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't want to pull in json dependencies into the header, so I forward declared JsonPathContainer. Actually pulling in json deps is not crucial as this header is used only in search internal files 🤔 There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Having global caches with no eviction and no limit seems dangerous, memory wise. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Those are field names defined by developers. Each schema has usually a few fields, and there are usually no more than a few indices per database. Having them occupy kilobytes of memory would require developers to type out kilobytes of field names which is really unlikely We can clear this cache once indices are deleted There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ah, they can't query for arbitrary fields outside of established indices? Somehow I thought it's possible but just slower. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, its only used for accessing an index - and that index must have been explicitly described by the user |
||
JsonAccessor::path_cache_; | ||
|
||
Comment on lines
+144
to
+146
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I've chosen to use this tl cache instead of polluting the core json parts with json stuff. Alternatively we can choose to either wire json to the core or use just a generic member like |
||
unique_ptr<BaseAccessor> GetAccessor(const DbContext& db_cntx, const PrimeValue& pv) { | ||
DCHECK(pv.ObjType() == OBJ_HASH || pv.ObjType() == OBJ_JSON); | ||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.