-
Notifications
You must be signed in to change notification settings - Fork 3.5k
Closed
Closed
Copy link
Labels
ep:WebGPUort-web webgpu providerort-web webgpu providerplatform:webissues related to ONNX Runtime web; typically submitted using templateissues related to ONNX Runtime web; typically submitted using template
Description
Describe the issue
Using the model referenced below (see STR) produces a substantially different output when using the WebGPU provider (wrong) vs the WASM provider (correct). The difference is big enough in the output to indicate some bug in some WebGPU operator.
To further confirm this, I ran inference with the same model using onnx in Python: the correct results are consistent with both the CUDA and the CPU providers.
I'm lost at how to debug this further unfortunately.
To reproduce
- Host the following web page with a minimal example. Note that it's using 1.21.0 from a CDN, but the result doesn't change even with the latest available dev version. The needed files can be downloaded: model and input image.
<html>
<head>
<script src="https://cdn.jsdelivr.net/npm/[email protected]/dist/ort.min.js"></script>
</head>
<body>
<h1>Example</h1>
<div>
<img id="image" src="turtle3.png" alt="Image preview..." width="1333" height="750" hidden>
<canvas id="mycanvas" width="1333" height="750"></canvas>
</div>
</body>
<script>
async function startInferenceSession(modelPath) {
const session = await ort
.InferenceSession
.create(
modelPath,
{
executionProviders: [
// 'wasm',
'webgpu'
],
graphOptimizationLevel: 'all',
}
);
console.log('Inference session created');
return session;
}
async function runInference(session, input) {
const feeds = {};
feeds[session.inputNames[0]] = input;
const outputData = await session.run(feeds);
return outputData;
}
async function imageToTensor(
imageData,
) {
const mean = [0.485, 0.456, 0.406]
const std = [0.229, 0.224, 0.225]
const rawTensor = await ort.Tensor.fromImage(imageData)
// Normalize the tensor as needed.
const rawData = rawTensor.data
const [_, C, H, W] = rawTensor.dims;
const normalizedData = new Float32Array(rawData.length)
const dataPerChannel = H * W;
for (let c = 0; c < C; c++) {
for (let h = 0; h < H; h++) {
for (let w = 0; w < W; w++) {
const i = c * dataPerChannel + h * W + w;
normalizedData[i] = (rawData[i] - mean[c]) / std[c];
}
}
}
return new ort.Tensor(normalizedData, rawTensor.dims);
}
async function testInference(session) {
console.log('Testing inference');
let canvas = document.getElementById("mycanvas");
let ctx = canvas.getContext("2d");
const imageData = ctx.getImageData(0, 0, canvas.width, canvas.height)
const tensor = await imageToTensor(imageData)
console.log('Tensor:', tensor)
const outData = await runInference(session, tensor)
const { pred_boxes, logits } = outData
console.log('logits (first 10 values):', logits.data.slice(0, 10))
}
async function main() {
let img = document.getElementById('image');
let canvas = document.getElementById("mycanvas");
var ctx = canvas.getContext("2d");
ctx.drawImage(img, 0, 0, 1333, 750);
let session = await startInferenceSession("./test.onnx");
await testInference(session);
}
main();
</script>
</html>
- With the wasm provider, the first logit values (see the console logs) are:
[-9.01431655883789, -9.044118881225586, -6.852304458618164, -8.999532699584961, -9.034984588623047, -8.848183631896973, -8.849029541015625, -6.6562042236328125, -8.858226776123047, -8.890541076660156]
. These numbers are correct and match my ground truth. - With webgpu the first logit values (see the console logs) are
[-9.747684478759766, -9.810420989990234, -8.110831260681152, -9.75857162475586, -9.758389472961426, -9.946432113647461, -9.912216186523438, -8.000043869018555, -9.904242515563965, -9.92009449005127]
. These numbers are quite a bit off.
Urgency
This issue is urgent and it's blocking my research, unfortunately.
ONNX Runtime Installation
Released Package
ONNX Runtime Version or Commit ID
1.22.0-dev.20250310-fe7634eb6f
Execution Provider
'webgpu' (WebGPU)
Metadata
Metadata
Assignees
Labels
ep:WebGPUort-web webgpu providerort-web webgpu providerplatform:webissues related to ONNX Runtime web; typically submitted using templateissues related to ONNX Runtime web; typically submitted using template