Skip to content
Merged
Show file tree
Hide file tree
Changes from 9 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/red-poems-pay.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'astro': minor
---

Improves the asset caching by allowing stale assets to be revalidated [using entity tags](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag).
98 changes: 73 additions & 25 deletions packages/astro/src/assets/build/generate.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,18 @@ import { getConfiguredImageService } from '../internal.js';
import type { LocalImageService } from '../services/service.js';
import type { AssetsGlobalStaticImagesList, ImageMetadata, ImageTransform } from '../types.js';
import { isESMImportedImage } from '../utils/imageKind.js';
import { type RemoteCacheEntry, loadRemoteImage } from './remote.js';
import { type RemoteCacheEntry, loadRemoteImage, revalidateRemoteImage } from './remote.js';

interface GenerationDataUncached {
cached: false;
cached: 'miss';
weight: {
before: number;
after: number;
};
}

interface GenerationDataCached {
cached: true;
cached: 'revalidated' | 'hit';
}

type GenerationData = GenerationDataUncached | GenerationDataCached;
Expand All @@ -44,7 +44,11 @@ type AssetEnv = {
assetsFolder: AstroConfig['build']['assets'];
};

type ImageData = { data: Uint8Array; expires: number };
type ImageData = {
data: Uint8Array;
expires: number;
etag?: string | null;
};

export async function prepareAssetsGenerationEnv(
pipeline: BuildPipeline,
Expand Down Expand Up @@ -135,9 +139,12 @@ export async function generateImagesForPath(
const timeEnd = performance.now();
const timeChange = getTimeStat(timeStart, timeEnd);
const timeIncrease = `(+${timeChange})`;
const statsText = generationData.cached
? `(reused cache entry)`
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`;
const statsText =
generationData.cached !== 'miss'
? generationData.cached === 'hit'
? `(reused cache entry)`
: `(revalidated cache entry)`
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`;
const count = `(${env.count.current}/${env.count.total})`;
env.logger.info(
null,
Expand All @@ -156,7 +163,7 @@ export async function generateImagesForPath(
const finalFolderURL = new URL('./', finalFileURL);
await fs.promises.mkdir(finalFolderURL, { recursive: true });

// For remote images, instead of saving the image directly, we save a JSON file with the image data and expiration date from the server
// For remote images, instead of saving the image directly, we save a JSON file with the image data, expiration date and etag from the server
const cacheFile = basename(filepath) + (isLocalImage ? '' : '.json');
const cachedFileURL = new URL(cacheFile, env.assetsCacheDir);

Expand All @@ -166,7 +173,7 @@ export async function generateImagesForPath(
await fs.promises.copyFile(cachedFileURL, finalFileURL, fs.constants.COPYFILE_FICLONE);

return {
cached: true,
cached: 'hit',
};
} else {
const JSONData = JSON.parse(readFileSync(cachedFileURL, 'utf-8')) as RemoteCacheEntry;
Expand All @@ -184,11 +191,43 @@ export async function generateImagesForPath(
await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64'));

return {
cached: true,
cached: 'hit',
};
} else {
await fs.promises.unlink(cachedFileURL);
}

// Try to revalidate the cache
if (JSONData.etag) {
try {
const revalidatedData = await revalidateRemoteImage(
options.src as string,
JSONData.etag,
);

if (revalidatedData.data.length) {
// Image cache was stale, update original image to avoid redownload
originalImage = revalidatedData;
} else {
revalidatedData.data = Buffer.from(JSONData.data, 'base64');

// Freshen cache on disk
await writeRemoteCacheFile(cachedFileURL, revalidatedData, env);

await fs.promises.writeFile(finalFileURL, revalidatedData.data);
return { cached: 'revalidated' };
}
} catch (e) {
// Reuse stale cache if revalidation fails
env.logger.warn(
null,
`An error was encountered while revalidating a cached remote asset. Proceeding with stale cache. ${e}`,
);

await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64'));
return { cached: 'hit' };
}
}

await fs.promises.unlink(cachedFileURL);
}
} catch (e: any) {
if (e.code !== 'ENOENT') {
Expand All @@ -209,6 +248,7 @@ export async function generateImagesForPath(
let resultData: Partial<ImageData> = {
data: undefined,
expires: originalImage.expires,
etag: originalImage.etag,
};

const imageService = (await getConfiguredImageService()) as LocalImageService;
Expand Down Expand Up @@ -239,13 +279,7 @@ export async function generateImagesForPath(
if (isLocalImage) {
await fs.promises.writeFile(cachedFileURL, resultData.data);
} else {
await fs.promises.writeFile(
cachedFileURL,
JSON.stringify({
data: Buffer.from(resultData.data).toString('base64'),
expires: resultData.expires,
}),
);
await writeRemoteCacheFile(cachedFileURL, resultData as ImageData, env);
}
}
} catch (e) {
Expand All @@ -259,7 +293,7 @@ export async function generateImagesForPath(
}

return {
cached: false,
cached: 'miss',
weight: {
// Divide by 1024 to get size in kilobytes
before: Math.trunc(originalImage.data.byteLength / 1024),
Expand All @@ -269,6 +303,24 @@ export async function generateImagesForPath(
}
}

async function writeRemoteCacheFile(cachedFileURL: URL, resultData: ImageData, env: AssetEnv) {
try {
return await fs.promises.writeFile(
cachedFileURL,
JSON.stringify({
data: Buffer.from(resultData.data).toString('base64'),
Copy link
Contributor

@ascorbic ascorbic Nov 21, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I realise that this is what we were already doing, but it does seem a bit wasteful to be saving images as JSON-encoded base64 strings, and this could be a good time to fix it. I think it would be better to store the image in a binary, and then use a separate JSON file for metadata, probably with the same filename alongside it but with something like an appended .json

expires: resultData.expires,
etag: resultData.etag,
}),
);
} catch (e) {
env.logger.warn(
null,
`An error was encountered while writing the cache file for a remote asset. Proceeding without caching this asset. Error: ${e}`,
);
}
}

export function getStaticImageList(): AssetsGlobalStaticImagesList {
if (!globalThis?.astroAsset?.staticImages) {
return new Map();
Expand All @@ -279,11 +331,7 @@ export function getStaticImageList(): AssetsGlobalStaticImagesList {

async function loadImage(path: string, env: AssetEnv): Promise<ImageData> {
if (isRemotePath(path)) {
const remoteImage = await loadRemoteImage(path);
return {
data: remoteImage.data,
expires: remoteImage.expires,
};
Comment on lines -282 to -286
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I have no idea what happened here, but thank you for cleaning it, ha

return await loadRemoteImage(path);
}

return {
Expand Down
45 changes: 44 additions & 1 deletion packages/astro/src/assets/build/remote.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import CachePolicy from 'http-cache-semantics';

export type RemoteCacheEntry = { data: string; expires: number };
export type RemoteCacheEntry = { data: string; expires: number; etag?: string };

export async function loadRemoteImage(src: string) {
const req = new Request(src);
Expand All @@ -19,6 +19,49 @@ export async function loadRemoteImage(src: string) {
return {
data: Buffer.from(await res.arrayBuffer()),
expires: Date.now() + expires,
etag: res.headers.get('Etag'),
};
}

/**
* Revalidate a cached remote asset using its entity-tag.
* Uses the [If-None-Match](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match) header to check with the remote server if the cached version of a remote asset is still up to date.
* The remote server may respond that the cached asset is still up-to-date if the entity-tag matches (304 Not Modified), or respond with an updated asset (200 OK)
* @param src - url to remote asset
* @param etag - the stored Entity-Tag of the cached asset
* @returns An ImageData object containing the asset data, a new expiry time, and the asset's etag. The data buffer will be empty if the asset was not modified.
*/
export async function revalidateRemoteImage(src: string, etag: string) {
const req = new Request(src, { headers: { 'If-None-Match': etag } });
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be worth also supporting If-Modified-Since, and storing the Last-Modified or Date for image responses that don't include an etag.

const res = await fetch(req);

// Asset not modified: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/304
if (!res.ok && res.status !== 304) {
throw new Error(
`Failed to revalidate cached remote image ${src}. The request did not return a 200 OK / 304 NOT MODIFIED response. (received ${res.status} ${res.statusText})`,
);
}

const data = Buffer.from(await res.arrayBuffer());

if (res.ok && !data.length) {
// Server did not include body but indicated cache was stale
return await loadRemoteImage(src);
}

// calculate an expiration date based on the response's TTL
const policy = new CachePolicy(
webToCachePolicyRequest(req),
webToCachePolicyResponse(
res.ok ? res : new Response(null, { status: 200, headers: res.headers }),
), // 304 responses themselves are not cachable, so just pretend to get the refreshed TTL
);
const expires = policy.storable() ? policy.timeToLive() : 0;

return {
data,
expires: Date.now() + expires,
etag: res.headers.get('Etag'),
};
}

Expand Down
Loading