Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions langchain-core/src/caches/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ import { type StoredGeneration } from "../messages/base.js";
* separate concerns and scale horizontally.
*
* TODO: Make cache key consistent across versions of LangChain.
*
* @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.
* This function will be removed in a future version.
*/
export const getCacheKey = (...strings: string[]): string =>
insecureHash(strings.join("_"));
Expand Down Expand Up @@ -43,6 +46,21 @@ export function serializeGeneration(generation: Generation) {
* Base class for all caches. All caches should extend this class.
*/
export abstract class BaseCache<T = Generation[]> {
// For backwards compatibility, we use a default key encoder
// that uses SHA-1 to hash the prompt and LLM key. This will also print a warning
// about the security implications of using SHA-1 as a cache key.
protected keyEncoder: (...strings: string[]) => string = getCacheKey;

/**
* Sets a custom key encoder function for the cache.
* This function should take a prompt and an LLM key and return a string
* that will be used as the cache key.
* @param keyEncoderFn The custom key encoder function.
*/
makeDefaultKeyEncoder(keyEncoderFn: (...strings: string[]) => string): void {
this.keyEncoder = keyEncoderFn;
}

abstract lookup(prompt: string, llmKey: string): Promise<T | null>;

abstract update(prompt: string, llmKey: string, value: T): Promise<void>;
Expand All @@ -69,7 +87,9 @@ export class InMemoryCache<T = Generation[]> extends BaseCache<T> {
* @returns The data corresponding to the prompt and LLM key, or null if not found.
*/
lookup(prompt: string, llmKey: string): Promise<T | null> {
return Promise.resolve(this.cache.get(getCacheKey(prompt, llmKey)) ?? null);
return Promise.resolve(
this.cache.get(this.keyEncoder(prompt, llmKey)) ?? null
);
}

/**
Expand All @@ -79,7 +99,7 @@ export class InMemoryCache<T = Generation[]> extends BaseCache<T> {
* @param value The data to be stored.
*/
async update(prompt: string, llmKey: string, value: T): Promise<void> {
this.cache.set(getCacheKey(prompt, llmKey), value);
this.cache.set(this.keyEncoder(prompt, llmKey), value);
}

/**
Expand Down
36 changes: 36 additions & 0 deletions langchain-core/src/caches/tests/in_memory_cache.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,3 +39,39 @@ test("InMemoryCache works with complex message types", async () => {
text: "text1",
});
});

test("InMemoryCache handles default key encoder", async () => {
const cache = new InMemoryCache();

await cache.update("prompt1", "key1", [
{
text: "text1",
},
]);

// expect this to call console.warn about SHA-1 usage
const result = await cache.lookup("prompt1", "key1");

expect(result).toBeDefined();
});

test("InMemoryCache handles custom key encoder", async () => {
const cache = new InMemoryCache();

// use fancy hashing algorithm to encode the key :)
cache.makeDefaultKeyEncoder((prompt, key) => `${prompt}###${key}`);

// expect custom key encoder not to call console.warn
await cache.update("prompt1", "key1", [
{
text: "text1",
},
]);

const result1 = await cache.lookup("prompt1", "key1");
expect(result1).toBeDefined();
if (!result1) {
return;
}
expect(result1[0].text).toBe("text1");
});
12 changes: 10 additions & 2 deletions langchain-core/src/indexing/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,20 @@ export class _HashedDocument implements HashedDocumentInterface {

metadata: Metadata;

// backwards compatibility with old code that used SHA-1
// as a key encoder. This will also print a warning about the security implications of using
private keyEncoder: (...strings: string[]) => string = insecureHash;

constructor(fields: HashedDocumentArgs) {
this.uid = fields.uid;
this.pageContent = fields.pageContent;
this.metadata = fields.metadata;
}

makeDefaultKeyEncoder(keyEncoderFn: (...strings: string[]) => string): void {
this.keyEncoder = keyEncoderFn;
}

calculateHashes(): void {
const forbiddenKeys = ["hash_", "content_hash", "metadata_hash"];

Expand Down Expand Up @@ -110,13 +118,13 @@ export class _HashedDocument implements HashedDocumentInterface {
}

private _hashStringToUUID(inputString: string): string {
const hash_value = insecureHash(inputString);
const hash_value = this.keyEncoder(inputString);
return uuidv5(hash_value, UUIDV5_NAMESPACE);
}

private _hashNestedDictToUUID(data: Record<string, unknown>): string {
const serialized_data = JSON.stringify(data, Object.keys(data).sort());
const hash_value = insecureHash(serialized_data);
const hash_value = this.keyEncoder(serialized_data);
return uuidv5(hash_value, UUIDV5_NAMESPACE);
}
}
Expand Down
18 changes: 18 additions & 0 deletions langchain-core/src/utils/js-sha1/hash.ts
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,24 @@ Sha1.prototype.arrayBuffer = function () {
return buffer;
};

let hasLoggedWarning = false;

/**
* @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.
* This function will be removed in a future version.
*/
export const insecureHash = (message) => {
if (!hasLoggedWarning) {
console.warn(
"Using default key encoder: SHA-1 is *not* collision-resistant. " +
"While acceptable for most cache scenarios, a motivated attacker " +
"can craft two different payloads that map to the same cache key. " +
"If that risk matters in your environment, supply a stronger " +
"encoder (e.g. SHA-3) by calling the `makeDefaultKeyEncoder()` method. " +
"If you change the key encoder, consider also creating a new cache, " +
"to avoid (the potential for) collisions with existing keys."
);
hasLoggedWarning = true;
}
return new Sha1(true).update(message)["hex"]();
};
Loading