Skip to content

Commit 3dc02c5

Browse files
oliverlynchascorbicsarah11918
authored
feat(assets): Use entity-tags to revalidate cached remote images (#12426)
* feat(assets): Store etag to refresh cached images without a full download * Seperate loading and revalidating functions * Add changeset * Updates based on requested changes * Wording changes, use stale cache on failure to revalidate * Add If-Modified-Since as cache revalidation method * Update .changeset/red-poems-pay.md Co-authored-by: Sarah Rainsberger <[email protected]> --------- Co-authored-by: Matt Kane <[email protected]> Co-authored-by: Sarah Rainsberger <[email protected]>
1 parent ca3ff15 commit 3dc02c5

File tree

3 files changed

+144
-26
lines changed

3 files changed

+144
-26
lines changed

.changeset/red-poems-pay.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
'astro': minor
3+
---
4+
5+
Improves asset caching of remote images
6+
7+
Astro will now store [entity tags](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/ETag) and the [Last-Modified](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Last-Modified) date for cached remote images and use them to revalidate the cache when it goes stale.

packages/astro/src/assets/build/generate.ts

Lines changed: 76 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,18 @@ import { getConfiguredImageService } from '../internal.js';
1515
import type { LocalImageService } from '../services/service.js';
1616
import type { AssetsGlobalStaticImagesList, ImageMetadata, ImageTransform } from '../types.js';
1717
import { isESMImportedImage } from '../utils/imageKind.js';
18-
import { type RemoteCacheEntry, loadRemoteImage } from './remote.js';
18+
import { type RemoteCacheEntry, loadRemoteImage, revalidateRemoteImage } from './remote.js';
1919

2020
interface GenerationDataUncached {
21-
cached: false;
21+
cached: 'miss';
2222
weight: {
2323
before: number;
2424
after: number;
2525
};
2626
}
2727

2828
interface GenerationDataCached {
29-
cached: true;
29+
cached: 'revalidated' | 'hit';
3030
}
3131

3232
type GenerationData = GenerationDataUncached | GenerationDataCached;
@@ -43,7 +43,12 @@ type AssetEnv = {
4343
assetsFolder: AstroConfig['build']['assets'];
4444
};
4545

46-
type ImageData = { data: Uint8Array; expires: number };
46+
type ImageData = {
47+
data: Uint8Array;
48+
expires: number;
49+
etag?: string;
50+
lastModified?: string;
51+
};
4752

4853
export async function prepareAssetsGenerationEnv(
4954
pipeline: BuildPipeline,
@@ -135,9 +140,12 @@ export async function generateImagesForPath(
135140
const timeEnd = performance.now();
136141
const timeChange = getTimeStat(timeStart, timeEnd);
137142
const timeIncrease = `(+${timeChange})`;
138-
const statsText = generationData.cached
139-
? `(reused cache entry)`
140-
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`;
143+
const statsText =
144+
generationData.cached !== 'miss'
145+
? generationData.cached === 'hit'
146+
? `(reused cache entry)`
147+
: `(revalidated cache entry)`
148+
: `(before: ${generationData.weight.before}kB, after: ${generationData.weight.after}kB)`;
141149
const count = `(${env.count.current}/${env.count.total})`;
142150
env.logger.info(
143151
null,
@@ -156,7 +164,7 @@ export async function generateImagesForPath(
156164
const finalFolderURL = new URL('./', finalFileURL);
157165
await fs.promises.mkdir(finalFolderURL, { recursive: true });
158166

159-
// For remote images, instead of saving the image directly, we save a JSON file with the image data and expiration date from the server
167+
// For remote images, instead of saving the image directly, we save a JSON file with the image data, expiration date, etag and last-modified date from the server
160168
const cacheFile = basename(filepath) + (isLocalImage ? '' : '.json');
161169
const cachedFileURL = new URL(cacheFile, env.assetsCacheDir);
162170

@@ -166,7 +174,7 @@ export async function generateImagesForPath(
166174
await fs.promises.copyFile(cachedFileURL, finalFileURL, fs.constants.COPYFILE_FICLONE);
167175

168176
return {
169-
cached: true,
177+
cached: 'hit',
170178
};
171179
} else {
172180
const JSONData = JSON.parse(readFileSync(cachedFileURL, 'utf-8')) as RemoteCacheEntry;
@@ -184,11 +192,43 @@ export async function generateImagesForPath(
184192
await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64'));
185193

186194
return {
187-
cached: true,
195+
cached: 'hit',
188196
};
189-
} else {
190-
await fs.promises.unlink(cachedFileURL);
191197
}
198+
199+
// Try to revalidate the cache
200+
if (JSONData.etag || JSONData.lastModified) {
201+
try {
202+
const revalidatedData = await revalidateRemoteImage(options.src as string, {
203+
etag: JSONData.etag,
204+
lastModified: JSONData.lastModified,
205+
});
206+
207+
if (revalidatedData.data.length) {
208+
// Image cache was stale, update original image to avoid redownload
209+
originalImage = revalidatedData;
210+
} else {
211+
revalidatedData.data = Buffer.from(JSONData.data, 'base64');
212+
213+
// Freshen cache on disk
214+
await writeRemoteCacheFile(cachedFileURL, revalidatedData, env);
215+
216+
await fs.promises.writeFile(finalFileURL, revalidatedData.data);
217+
return { cached: 'revalidated' };
218+
}
219+
} catch (e) {
220+
// Reuse stale cache if revalidation fails
221+
env.logger.warn(
222+
null,
223+
`An error was encountered while revalidating a cached remote asset. Proceeding with stale cache. ${e}`,
224+
);
225+
226+
await fs.promises.writeFile(finalFileURL, Buffer.from(JSONData.data, 'base64'));
227+
return { cached: 'hit' };
228+
}
229+
}
230+
231+
await fs.promises.unlink(cachedFileURL);
192232
}
193233
} catch (e: any) {
194234
if (e.code !== 'ENOENT') {
@@ -209,6 +249,8 @@ export async function generateImagesForPath(
209249
let resultData: Partial<ImageData> = {
210250
data: undefined,
211251
expires: originalImage.expires,
252+
etag: originalImage.etag,
253+
lastModified: originalImage.lastModified,
212254
};
213255

214256
const imageService = (await getConfiguredImageService()) as LocalImageService;
@@ -239,13 +281,7 @@ export async function generateImagesForPath(
239281
if (isLocalImage) {
240282
await fs.promises.writeFile(cachedFileURL, resultData.data);
241283
} else {
242-
await fs.promises.writeFile(
243-
cachedFileURL,
244-
JSON.stringify({
245-
data: Buffer.from(resultData.data).toString('base64'),
246-
expires: resultData.expires,
247-
}),
248-
);
284+
await writeRemoteCacheFile(cachedFileURL, resultData as ImageData, env);
249285
}
250286
}
251287
} catch (e) {
@@ -259,7 +295,7 @@ export async function generateImagesForPath(
259295
}
260296

261297
return {
262-
cached: false,
298+
cached: 'miss',
263299
weight: {
264300
// Divide by 1024 to get size in kilobytes
265301
before: Math.trunc(originalImage.data.byteLength / 1024),
@@ -269,6 +305,25 @@ export async function generateImagesForPath(
269305
}
270306
}
271307

308+
async function writeRemoteCacheFile(cachedFileURL: URL, resultData: ImageData, env: AssetEnv) {
309+
try {
310+
return await fs.promises.writeFile(
311+
cachedFileURL,
312+
JSON.stringify({
313+
data: Buffer.from(resultData.data).toString('base64'),
314+
expires: resultData.expires,
315+
etag: resultData.etag,
316+
lastModified: resultData.lastModified,
317+
}),
318+
);
319+
} catch (e) {
320+
env.logger.warn(
321+
null,
322+
`An error was encountered while writing the cache file for a remote asset. Proceeding without caching this asset. Error: ${e}`,
323+
);
324+
}
325+
}
326+
272327
export function getStaticImageList(): AssetsGlobalStaticImagesList {
273328
if (!globalThis?.astroAsset?.staticImages) {
274329
return new Map();
@@ -279,11 +334,7 @@ export function getStaticImageList(): AssetsGlobalStaticImagesList {
279334

280335
async function loadImage(path: string, env: AssetEnv): Promise<ImageData> {
281336
if (isRemotePath(path)) {
282-
const remoteImage = await loadRemoteImage(path);
283-
return {
284-
data: remoteImage.data,
285-
expires: remoteImage.expires,
286-
};
337+
return await loadRemoteImage(path);
287338
}
288339

289340
return {

packages/astro/src/assets/build/remote.ts

Lines changed: 61 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
import CachePolicy from 'http-cache-semantics';
22

3-
export type RemoteCacheEntry = { data: string; expires: number };
3+
export type RemoteCacheEntry = {
4+
data: string;
5+
expires: number;
6+
etag?: string;
7+
lastModified?: string;
8+
};
49

510
export async function loadRemoteImage(src: string) {
611
const req = new Request(src);
@@ -19,6 +24,61 @@ export async function loadRemoteImage(src: string) {
1924
return {
2025
data: Buffer.from(await res.arrayBuffer()),
2126
expires: Date.now() + expires,
27+
etag: res.headers.get('Etag') ?? undefined,
28+
lastModified: res.headers.get('Last-Modified') ?? undefined,
29+
};
30+
}
31+
32+
/**
33+
* Revalidate a cached remote asset using its entity-tag or modified date.
34+
* Uses the [If-None-Match](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-None-Match) and [If-Modified-Since](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/If-Modified-Since)
35+
* headers to check with the remote server if the cached version of a remote asset is still up to date.
36+
* The remote server may respond that the cached asset is still up-to-date if the entity-tag or modification time matches (304 Not Modified), or respond with an updated asset (200 OK)
37+
* @param src - url to remote asset
38+
* @param revalidationData - an object containing the stored Entity-Tag of the cached asset and/or the Last Modified time
39+
* @returns An ImageData object containing the asset data, a new expiry time, and the asset's etag. The data buffer will be empty if the asset was not modified.
40+
*/
41+
export async function revalidateRemoteImage(
42+
src: string,
43+
revalidationData: { etag?: string; lastModified?: string },
44+
) {
45+
const headers = {
46+
...(revalidationData.etag && { 'If-None-Match': revalidationData.etag }),
47+
...(revalidationData.lastModified && { 'If-Modified-Since': revalidationData.lastModified }),
48+
};
49+
const req = new Request(src, { headers });
50+
const res = await fetch(req);
51+
52+
// Asset not modified: https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/304
53+
if (!res.ok && res.status !== 304) {
54+
throw new Error(
55+
`Failed to revalidate cached remote image ${src}. The request did not return a 200 OK / 304 NOT MODIFIED response. (received ${res.status} ${res.statusText})`,
56+
);
57+
}
58+
59+
const data = Buffer.from(await res.arrayBuffer());
60+
61+
if (res.ok && !data.length) {
62+
// Server did not include body but indicated cache was stale
63+
return await loadRemoteImage(src);
64+
}
65+
66+
// calculate an expiration date based on the response's TTL
67+
const policy = new CachePolicy(
68+
webToCachePolicyRequest(req),
69+
webToCachePolicyResponse(
70+
res.ok ? res : new Response(null, { status: 200, headers: res.headers }),
71+
), // 304 responses themselves are not cachable, so just pretend to get the refreshed TTL
72+
);
73+
const expires = policy.storable() ? policy.timeToLive() : 0;
74+
75+
return {
76+
data,
77+
expires: Date.now() + expires,
78+
// While servers should respond with the same headers as a 200 response, if they don't we should reuse the stored value
79+
etag: res.headers.get('Etag') ?? (res.ok ? undefined : revalidationData.etag),
80+
lastModified:
81+
res.headers.get('Last-Modified') ?? (res.ok ? undefined : revalidationData.lastModified),
2282
};
2383
}
2484

0 commit comments

Comments
 (0)