/* Any copyright is dedicated to the Public Domain.
http://creativecommons.org/publicdomain/zero/1.0/ */
/// <reference path="../../../../../toolkit/components/translations/tests/browser/shared-head.js" />
"use strict";
/**
* @type {import("../../actors/MLEngineParent.sys.mjs")}
*/
const { MLEngineParent } = ChromeUtils.importESModule(
"resource://gre/actors/MLEngineParent.sys.mjs"
);
const { ModelHub, IndexedDBCache } = ChromeUtils.importESModule(
"chrome://global/content/ml/ModelHub.sys.mjs"
);
const {
createEngine,
PipelineOptions,
QuantizationLevel,
ExecutionPriority,
InferenceDevice,
LogLevel,
} = ChromeUtils.importESModule(
"chrome://global/content/ml/EngineProcess.sys.mjs"
);
// This test suite shares some utility functions with translations as they work in a very
// similar fashion. Eventually, the plan is to unify these two components.
Services.scriptloader.loadSubScript(
"chrome://mochitests/content/browser/toolkit/components/translations/tests/browser/shared-head.js",
this
);
/**
* Sets up the stage for a test
*
*/
async
function setup({ disabled =
false, prefs = [], records =
null } = {}) {
const { removeMocks, remoteClients } = await createAndMockMLRemoteSettings({
autoDownloadFromRemoteSettings:
false,
records,
});
await SpecialPowers.pushPrefEnv({
set: [
// Enabled by default.
[
"browser.ml.enable", !disabled],
[
"browser.ml.logLevel",
"All"],
[
"browser.ml.modelCacheTimeout", 1000],
[
"browser.ml.checkForMemory",
false],
[
"browser.ml.queueWaitTimeout", 2],
[
"javascript.options.wasm_lazy_tiering",
true],
...prefs,
],
});
return {
remoteClients,
async cleanup() {
await removeMocks();
await waitForCondition(
() => EngineProcess.areAllEnginesTerminated(),
"Waiting for all of the engines to be terminated.",
100,
200
);
await SpecialPowers.popPrefEnv();
},
};
}
function getDefaultWasmRecords() {
return [
{
name: MLEngineParent.WASM_FILENAME,
version: MLEngineParent.WASM_MAJOR_VERSION +
".0",
},
];
}
async
function createAndMockMLRemoteSettings({
autoDownloadFromRemoteSettings =
false,
records =
null,
} = {}) {
const wasmRecords = getDefaultWasmRecords().map(({ name, version }) => ({
id: crypto.randomUUID(),
name,
version,
last_modified: Date.now(),
schema: Date.now(),
}));
const runtime = await createRemoteClient({
collectionName:
"test-translation-wasm",
records: wasmRecords,
attachmentMock:
true,
autoDownloadFromRemoteSettings,
});
const options = await createRemoteClient({
records: records || [
{
taskName:
"moz-echo",
modelId:
"mozilla/distilvit",
processorId:
"mozilla/distilvit",
tokenizerId:
"mozilla/distilvit",
modelRevision:
"main",
processorRevision:
"main",
tokenizerRevision:
"main",
dtype:
"q8",
id:
"74a71cfd-1734-44e6-85c0-69cf3e874138",
},
],
collectionName:
"test-ml-inference-options",
});
const allowDeny = await createRemoteClient({
records: [
{
filter:
"ALLOW",
urlPrefix:
"https://",
id:
"74a71cfd-1734-44e6-85c0-69cf3e874138",
},
],
collectionName:
"test-ml-allow-deny-list",
});
const remoteClients = {
"ml-onnx-runtime": runtime,
"ml-inference-options": options,
"ml-model-allow-deny-list": allowDeny,
};
MLEngineParent.mockRemoteSettings({
"ml-onnx-runtime": runtime.client,
"ml-inference-options": options,
"ml-model-allow-deny-list": allowDeny,
});
return {
async removeMocks() {
await runtime.client.attachments.deleteAll();
await runtime.client.db.clear();
await options.db.clear();
await allowDeny.db.clear();
MLEngineParent.removeMocks();
},
remoteClients,
};
}
/**
* Creates a local RemoteSettingsClient for use within tests.
*
* @returns {RemoteSettings|AttachmentMock}
*/
async
function createRemoteClient({
records,
collectionName,
attachmentMock =
false,
autoDownloadFromRemoteSettings =
false,
}) {
const { RemoteSettings } = ChromeUtils.importESModule(
"resource://services-settings/remote-settings.sys.mjs"
);
const client = RemoteSettings(`${collectionName}-${_remoteSettingsMockId++}`);
await client.db.clear();
await client.db.importChanges({}, Date.now(), records);
if (attachmentMock) {
return createAttachmentMock(
client,
collectionName,
autoDownloadFromRemoteSettings
);
}
return client;
}
/*
* Perftest related
*/
const MB_TO_BYTES = 1024 * 1024;
const INIT_START =
"initializationStart";
const INIT_END =
"initializationEnd";
const RUN_START =
"runStart";
const RUN_END =
"runEnd";
const PIPELINE_READY_START =
"ensurePipelineIsReadyStart";
const PIPELINE_READY_END =
"ensurePipelineIsReadyEnd";
const PIPELINE_READY_LATENCY =
"pipeline-ready-latency";
const INITIALIZATION_LATENCY =
"initialization-latency";
const MODEL_RUN_LATENCY =
"model-run-latency";
const TOTAL_MEMORY_USAGE =
"total-memory-usage";
const COLD_START_PREFIX =
"cold-start-";
const ITERATIONS = 10;
const WHEN =
"when";
const MEMORY =
"memory";
const formatNumber =
new Intl.NumberFormat(
"en-US", {
maximumSignificantDigits: 4,
}).format;
function median(arr) {
arr = [...arr].sort((a, b) => a - b);
const mid = Math.floor(arr.length / 2);
if (arr.length % 2) {
return arr[mid];
}
return (arr[mid - 1] + arr[mid]) / 2;
}
function stringify(arr) {
function pad(str) {
str = str.padStart(7,
" ");
if (str[0] !=
" ") {
str =
" " + str;
}
return str;
}
return arr.reduce((acc, elem) => acc + pad(formatNumber(elem)),
"");
}
function reportMetrics(journal) {
let metrics = {};
let text =
"\nResults (ms)\n";
const names = Object.keys(journal);
const prefixLen = 1 + Math.max(...names.map(str => str.length));
for (
const name in journal) {
const med = median(journal[name]);
text += (name +
":").padEnd(prefixLen,
" ") + stringify(journal[name]);
text +=
" median " + formatNumber(med) +
"\n";
metrics[name] = med;
}
dump(text);
info(`perfMetrics | ${JSON.stringify(metrics)}`);
}
/**
* Fetches the latest metric entry with the specified name and retrieves its value for the given key.
* If multiple metrics share the same name, the function returns the key from the most recent one.
*
* @param {Array<object>} metrics - The array of metric objects to search through.
* @param {string} name - The name of the metric to find.
* @param {string} key - The key within the metric object whose value should be returned.
* @returns {*} - The value of the specified key in the latest metric with the given name, or undefined if no matching metric is found.
*/
function fetchMLMetric(metrics, name, key) {
const matchingMetrics = metrics.filter(metric => metric.name === name);
if (matchingMetrics.length === 0) {
return undefined;
}
// Return undefined if no match found
const latestMetric = matchingMetrics[matchingMetrics.length - 1];
return latestMetric[key];
}
function fetchLatencyMetrics(metrics, isFirstRun) {
const pipelineLatency =
fetchMLMetric(metrics, PIPELINE_READY_END, WHEN) -
fetchMLMetric(metrics, PIPELINE_READY_START, WHEN);
const initLatency =
fetchMLMetric(metrics, INIT_END, WHEN) -
fetchMLMetric(metrics, INIT_START, WHEN);
const runLatency =
fetchMLMetric(metrics, RUN_END, WHEN) -
fetchMLMetric(metrics, RUN_START, WHEN);
return {
[`${isFirstRun ? COLD_START_PREFIX :
""}${PIPELINE_READY_LATENCY}`]:
pipelineLatency,
[`${isFirstRun ? COLD_START_PREFIX :
""}${INITIALIZATION_LATENCY}`]:
initLatency,
[`${isFirstRun ? COLD_START_PREFIX :
""}${MODEL_RUN_LATENCY}`]: runLatency,
};
}
function fetchMetrics(metrics, isFirstRun) {
return {
...fetchLatencyMetrics(metrics, isFirstRun),
};
}
async
function initializeEngine(pipelineOptions) {
const modelDirectory = normalizePathForOS(
`${Services.env.get(
"MOZ_FETCHES_DIR")}/onnx-models`
);
info(`Model Directory: ${modelDirectory}`);
const modelHubRootUrl = Services.env.get(
"MOZ_MODELS_HUB");
if (!modelHubRootUrl) {
throw new Error(
"MOZ_MODELS_HUB is not set, you need to run with --hooks toolkit/components/ml/tests/tools/hook_local_hub.py"
);
}
info(`ModelHubRootUrl: ${modelHubRootUrl}`);
const { cleanup } = await perfSetup({
prefs: [[
"browser.ml.modelHubRootUrl", modelHubRootUrl]],
});
info(
"Get the engine process");
const mlEngineParent = await EngineProcess.getMLEngineParent();
info(
"Get Pipeline Options");
info(
"Run the inference");
return {
cleanup,
engine: await mlEngineParent.getEngine(pipelineOptions),
};
}
function normalizePathForOS(path) {
if (Services.appinfo.OS ===
"WINNT") {
// On Windows, replace forward slashes with backslashes
return path.replace(/\
//g, "\\");
}
// On Unix-like systems, replace backslashes with forward slashes
return path.replace(/\\/g,
"/");
}
async
function perfSetup({ disabled =
false, prefs = [] } = {}) {
const { removeMocks, remoteClients } = await createAndMockMLRemoteSettings({
autoDownloadFromRemoteSettings:
false,
});
await SpecialPowers.pushPrefEnv({
set: [
// Enabled by default.
[
"browser.ml.enable", !disabled],
[
"browser.ml.logLevel",
"Error"],
[
"browser.ml.modelCacheTimeout", 1000],
[
"browser.ml.checkForMemory",
false],
[
"javascript.options.wasm_lazy_tiering",
true],
...prefs,
],
});
const artifactDirectory = normalizePathForOS(
`${Services.env.get(
"MOZ_FETCHES_DIR")}`
);
async
function pathExists(path) {
try {
return await IOUtils.exists(path);
}
catch (e) {
return false;
}
}
// Stop immediately if this fails.
if (!artifactDirectory) {
throw new Error(
`The wasm artifact directory is not set.
This usually happens when running locally.
" +
"Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " +
"Place them in a directory and rerun the test with the environment variable 'MOZ_FETCHES_DIR' " +
"set such that all the files are directly inside 'MOZ_FETCHES_DIR'`
);
}
if (!PathUtils.isAbsolute(artifactDirectory)) {
throw new Error(
"Please provide an absolute path for 'MOZ_FETCHES_DIR and not a relative path"
);
}
async
function download(record) {
const recordPath = normalizePathForOS(
`${artifactDirectory}/${record.name}`
);
// Stop immediately if this fails.
if (!(await pathExists(recordPath))) {
throw new Error(`The wasm file <${recordPath}> does not exist.
This usually happens when runnin
g locally. " +
"Please download all the files from taskcluster/kinds/fetch/onnxruntime-web-fetch.yml. " +
"Place them in the directory <${artifactDirectory}> " +
"such that <${recordPath}> exists.`);
}
return {
buffer: (await IOUtils.read(recordPath)).buffer,
};
}
remoteClients["ml-onnx-runtime"].client.attachments.download = download;
return {
remoteClients,
async cleanup() {
await removeMocks();
await waitForCondition(
() => EngineProcess.areAllEnginesTerminated(),
"Waiting for all of the engines to be terminated.",
100,
200
);
await SpecialPowers.popPrefEnv();
},
};
}
/**
* Returns the total memory usage in MiB for the inference process
*/
async function getTotalMemoryUsage() {
let mgr = Cc["@mozilla.org/memory-reporter-manager;1"].getService(
Ci.nsIMemoryReporterManager
);
let total = 0;
const handleReport = (
aProcess,
aPath,
_aKind,
_aUnits,
aAmount,
_aDescription
) => {
if (aProcess.startsWith("inference")) {
if (aPath.startsWith("explicit")) {
total += aAmount;
}
}
};
await new Promise(r =>
mgr.getReportsExtended(
handleReport,
null,
r,
null,
/* anonymized = */ false,
/* minimizeMemoryUsage = */ true,
null
)
);
return Math.round(total / 1024 / 1024);
}
/**
* Runs an inference given the options and arguments
*
*/
async function runInference(pipelineOptions, request, isFirstRun = false) {
const { cleanup, engine } = await initializeEngine(pipelineOptions);
let metrics = {};
try {
const res = await engine.run(request);
metrics = fetchMetrics(res.metrics, isFirstRun);
metrics[`${isFirstRun ? COLD_START_PREFIX : ""}${TOTAL_MEMORY_USAGE}`] =
await getTotalMemoryUsage();
} finally {
await EngineProcess.destroyMLEngine();
await cleanup();
}
return metrics;
}
/**
* Runs a performance test for the given name, options, and arguments and
* reports the results for perfherder.
*/
async function perfTest(
name,
options,
request,
iterations = ITERATIONS,
addColdStart = false
) {
name = name.toUpperCase();
let METRICS = [
`${name}-${PIPELINE_READY_LATENCY}`,
`${name}-${INITIALIZATION_LATENCY}`,
`${name}-${MODEL_RUN_LATENCY}`,
`${name}-${TOTAL_MEMORY_USAGE}`,
...(addColdStart
? [
`${name}-${COLD_START_PREFIX}${PIPELINE_READY_LATENCY}`,
`${name}-${COLD_START_PREFIX}${INITIALIZATION_LATENCY}`,
`${name}-${COLD_START_PREFIX}${MODEL_RUN_LATENCY}`,
`${name}-${COLD_START_PREFIX}${TOTAL_MEMORY_USAGE}`,
]
: []),
];
const journal = {};
for (let metric of METRICS) {
journal[metric] = [];
}
const pipelineOptions = new PipelineOptions(options);
let nIterations = addColdStart ? iterations + 1 : iterations;
for (let i = 0; i < nIterations; i++) {
const shouldAddColdStart = addColdStart && i === 0;
let metrics = await runInference(
pipelineOptions,
request,
shouldAddColdStart
);
for (let [metricName, metricVal] of Object.entries(metrics)) {
if (metricVal === null || metricVal === undefined || metricVal < 0) {
metricVal = 0;
}
journal[`${name}-${metricName}`].push(metricVal);
}
}
Assert.ok(true);
reportMetrics(journal);
}