Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 21 additions & 2 deletions langchain-core/src/caches/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@ import { type StoredGeneration } from "../messages/base.js";
* separate concerns and scale horizontally.
*
* TODO: Make cache key consistent across versions of LangChain.
*
* @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.
* This function will be removed in a future version.
*/
export const getCacheKey = (...strings: string[]): string =>
insecureHash(strings.join("_"));
Expand Down Expand Up @@ -43,6 +46,22 @@ export function serializeGeneration(generation: Generation) {
* Base class for all caches. All caches should extend this class.
*/
export abstract class BaseCache<T = Generation[]> {

// For backwards compatibility, we use a default key encoder
// that uses SHA-1 to hash the prompt and LLM key. This will also print a warning
// about the security implications of using SHA-1 as a cache key.
protected keyEncoder: (...strings: string[]) => string = getCacheKey;

/**
* Sets a custom key encoder function for the cache.
* This function should take a prompt and an LLM key and return a string
* that will be used as the cache key.
* @param keyEncoderFn The custom key encoder function.
*/
makeDefaultKeyEncoder(keyEncoderFn: (...strings: string[]) => string): void {
this.keyEncoder = keyEncoderFn;
}

abstract lookup(prompt: string, llmKey: string): Promise<T | null>;

abstract update(prompt: string, llmKey: string, value: T): Promise<void>;
Expand All @@ -69,7 +88,7 @@ export class InMemoryCache<T = Generation[]> extends BaseCache<T> {
* @returns The data corresponding to the prompt and LLM key, or null if not found.
*/
lookup(prompt: string, llmKey: string): Promise<T | null> {
return Promise.resolve(this.cache.get(getCacheKey(prompt, llmKey)) ?? null);
return Promise.resolve(this.cache.get(this.keyEncoder(prompt, llmKey)) ?? null);
}

/**
Expand All @@ -79,7 +98,7 @@ export class InMemoryCache<T = Generation[]> extends BaseCache<T> {
* @param value The data to be stored.
*/
async update(prompt: string, llmKey: string, value: T): Promise<void> {
this.cache.set(getCacheKey(prompt, llmKey), value);
this.cache.set(this.keyEncoder(prompt, llmKey), value);
}

/**
Expand Down
51 changes: 50 additions & 1 deletion langchain-core/src/caches/tests/in_memory_cache.test.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { test, expect } from "@jest/globals";
import { test, expect, jest } from "@jest/globals";
import { MessageContentComplex } from "../../messages/base.js";
import { InMemoryCache } from "../base.js";

Expand Down Expand Up @@ -39,3 +39,52 @@ test("InMemoryCache works with complex message types", async () => {
text: "text1",
});
});

test("InMemoryCache handles default key encoder", async () => {
const cache = new InMemoryCache();
const consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation(() => {});

await cache.update("prompt1", "key1", [
{
text: "text1",
},
]);

// expect this to call console.warn about SHA-1 usage
const result = await cache.lookup("prompt1", "key1");

expect(result).toBeDefined();
if (!result) {
return;
}
expect(consoleWarnSpy).toHaveBeenCalled();
consoleWarnSpy.mockRestore();
});


test("InMemoryCache handles custom key encoder", async () => {
const cache = new InMemoryCache();
const consoleWarnSpy = jest.spyOn(console, "warn").mockImplementation(() => {});

// use fancy hashing algorithm to encode the key :)
cache.makeDefaultKeyEncoder((prompt, key) => `${prompt}###${key}`);

// expect custom key encoder not to call console.warn
await cache.update("prompt1", "key1", [
{
text: "text1",
},
]);

expect(consoleWarnSpy).not.toHaveBeenCalled();

const result1 = await cache.lookup("prompt1", "key1");
expect(result1).toBeDefined();
if (!result1) {
return;
}
expect(result1[0].text).toBe("text1");

consoleWarnSpy.mockRestore();

});
14 changes: 12 additions & 2 deletions langchain-core/src/indexing/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,22 @@ export class _HashedDocument implements HashedDocumentInterface {

metadata: Metadata;

// backwards compatibility with old code that used SHA-1
// as a key encoder. This will also print a warning about the security implications of using
private keyEncoder: (...strings: string[]) => string = insecureHash;

constructor(fields: HashedDocumentArgs) {
this.uid = fields.uid;
this.pageContent = fields.pageContent;
this.metadata = fields.metadata;
}

makeDefaultKeyEncoder(
keyEncoderFn: (...strings: string[]) => string
): void {
this.keyEncoder = keyEncoderFn;
}

calculateHashes(): void {
const forbiddenKeys = ["hash_", "content_hash", "metadata_hash"];

Expand Down Expand Up @@ -110,13 +120,13 @@ export class _HashedDocument implements HashedDocumentInterface {
}

private _hashStringToUUID(inputString: string): string {
const hash_value = insecureHash(inputString);
const hash_value = this.keyEncoder(inputString);
return uuidv5(hash_value, UUIDV5_NAMESPACE);
}

private _hashNestedDictToUUID(data: Record<string, unknown>): string {
const serialized_data = JSON.stringify(data, Object.keys(data).sort());
const hash_value = insecureHash(serialized_data);
const hash_value = this.keyEncoder(serialized_data);
return uuidv5(hash_value, UUIDV5_NAMESPACE);
}
}
Expand Down
13 changes: 13 additions & 0 deletions langchain-core/src/utils/js-sha1/hash.ts
Original file line number Diff line number Diff line change
Expand Up @@ -412,6 +412,19 @@ Sha1.prototype.arrayBuffer = function () {
return buffer;
};

/**
* @deprecated Use `makeDefaultKeyEncoder()` to create a custom key encoder.
* This function will be removed in a future version.
*/
export const insecureHash = (message) => {
console.warn(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's just log this once on initial call to avoid flooding the console and add some example code

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add some example code

Can you elaborate on which example code you'd like to be added?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jacoblee93 can we maybe link to some docs?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah right... this is used in multiple places

@hntrl maybe let's make a small docs page with agnostic info here?

"Using default key encoder: SHA-1 is *not* collision-resistant. " +
"While acceptable for most cache scenarios, a motivated attacker " +
"can craft two different payloads that map to the same cache key. " +
"If that risk matters in your environment, supply a stronger " +
"encoder (e.g. SHA-3) by calling the `makeDefaultKeyEncoder()` method. " +
"If you change the key encoder, consider also creating a new cache, " +
"to avoid (the potential for) collisions with existing keys."
);
return new Sha1(true).update(message)["hex"]();
};
Loading