Quelle models.profiles.live.test.ts

Sprache: JAVA

import { type Api, completeSimple, type Model } from "@mariozechner/pi-ai";
import { Type } from "typebox";
import { describe, expect, it } from "vitest";
import { loadConfig } from "../config/config.js";
import { parseLiveCsvFilter } from "../media-generation/live-test-helpers.js";
import { runTasksWithConcurrency } from "../utils/run-with-concurrency.js";
import { resolveOpenClawAgentDir } from "./agent-paths.js";
import {
  collectAnthropicApiKeys,
  isAnthropicBillingError,
  isAnthropicRateLimitError,
} from "./live-auth-keys.js";
import { isModelNotFoundErrorMessage } from "./live-model-errors.js";
import {
  isHighSignalLiveModelRef,
  resolveHighSignalLiveModelLimit,
  selectHighSignalLiveItems,
  shouldExcludeProviderFromDefaultHighSignalLiveSweep,
} from "./live-model-filter.js";
import {
  buildLiveModelFileProbeContext,
  buildLiveModelFileProbeRetryContext,
  buildLiveModelImageProbeContext,
  extractAssistantText,
  fileProbeTextMatches,
  imageProbeTextMatches,
  isLiveModelProbeEnabled,
  LIVE_MODEL_FILE_PROBE_ENV,
  LIVE_MODEL_FILE_PROBE_TOKEN,
  LIVE_MODEL_IMAGE_PROBE_ENV,
  modelSupportsImageInput,
  shouldSkipLiveModelExtraProbes,
  shouldSkipLiveModelFileProbe,
  shouldSkipLiveModelImageProbe,
} from "./live-model-turn-probes.js";
import { createLiveTargetMatcher } from "./live-target-matcher.js";
import { isLiveProfileKeyModeEnabled, isLiveTestEnabled } from "./live-test-helpers.js";
import { getApiKeyForModel, requireApiKey } from "./model-auth.js";
import { shouldSuppressBuiltInModel } from "./model-suppression.js";
import { ensureOpenClawModelsJson } from "./models-config.js";
import {
  isCloudflareOrHtmlErrorPage,
  isRateLimitErrorMessage,
} from "./pi-embedded-helpers/errors.js";
import { discoverAuthStorage, discoverModels } from "./pi-model-discovery.js";

const LIVE = isLiveTestEnabled();
const DIRECT_ENABLED = Boolean(process.env.OPENCLAW_LIVE_MODELS?.trim());
const REQUIRE_PROFILE_KEYS = isLiveProfileKeyModeEnabled();
const LIVE_CREDENTIAL_PRECEDENCE = REQUIRE_PROFILE_KEYS ? "profile-first" : "env-first";
const LIVE_HEARTBEAT_MS = Math.max(1_000, toInt(process.env.OPENCLAW_LIVE_HEARTBEAT_MS, 30_000));
const LIVE_SETUP_TIMEOUT_MS = Math.max(
  1_000,
  toInt(process.env.OPENCLAW_LIVE_SETUP_TIMEOUT_MS, 45_000),
);
const LIVE_TEST_TIMEOUT_MS = Math.max(
  1_000,
  toInt(process.env.OPENCLAW_LIVE_TEST_TIMEOUT_MS, 60 * 60 * 1000),
);
const DEFAULT_LIVE_MODEL_CONCURRENCY = 20;
const LIVE_MODEL_CONCURRENCY = resolveLiveModelConcurrency(
  process.env.OPENCLAW_LIVE_MODEL_CONCURRENCY,
);
const LIVE_MODELS_JSON_TIMEOUT_MS = resolveLiveModelsJsonTimeoutMs(
  process.env.OPENCLAW_LIVE_MODELS_JSON_TIMEOUT_MS,
);
const LIVE_FILE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_FILE_PROBE_ENV);
const LIVE_IMAGE_PROBE_ENABLED = isLiveModelProbeEnabled(process.env, LIVE_MODEL_IMAGE_PROBE_ENV);

const describeLive = LIVE ? describe : describe.skip;

function parseCsvFilter(raw?: string): Set<string> | null {
  return parseLiveCsvFilter(raw, { lowercase: false });
}

function parseProviderFilter(raw?: string): Set<string> | null {
  return parseCsvFilter(raw);
}

function parseModelFilter(raw?: string): Set<string> | null {
  return parseCsvFilter(raw);
}

function logProgress(message: string): void {
  process.stderr.write(`[live] ${message}\n`);
}

function formatElapsedSeconds(ms: number): string {
  return `${Math.max(1, Math.round(ms / 1_000))}s`;
}

async function withLiveHeartbeat<T>(operation: Promise<T>, context: string): Promise<T> {
  const startedAt = Date.now();
  let heartbeatCount = 0;
  const timer = setInterval(() => {
    heartbeatCount += 1;
    logProgress(`${context}: still running (${formatElapsedSeconds(Date.now() - startedAt)})`);
  }, LIVE_HEARTBEAT_MS);
  timer.unref?.();
  try {
    return await operation;
  } finally {
    clearInterval(timer);
    if (heartbeatCount > 0) {
      logProgress(`${context}: completed after ${formatElapsedSeconds(Date.now() - startedAt)}`);
    }
  }
}

async function withLiveStageTimeout<T>(
  operation: Promise<T>,
  context: string,
  timeoutMs = LIVE_SETUP_TIMEOUT_MS,
): Promise<T> {
  let hardTimer: ReturnType<typeof setTimeout> | undefined;
  try {
    return await withLiveHeartbeat(
      Promise.race([
        operation,
        new Promise<never>((_, reject) => {
          hardTimer = setTimeout(() => {
            reject(new Error(`${context} timed out after ${timeoutMs}ms`));
          }, timeoutMs);
          hardTimer.unref?.();
        }),
      ]),
      context,
    );
  } finally {
    if (hardTimer) {
      clearTimeout(hardTimer);
    }
  }
}

function formatFailurePreview(
  failures: Array<{ model: string; error: string }>,
  maxItems: number,
): string {
  const limit = Math.max(1, maxItems);
  const lines = failures.slice(0, limit).map((failure, index) => {
    const normalized = failure.error.replace(/\s+/g, " ").trim();
    const clipped = normalized.length > 320 ? `${normalized.slice(0, 317)}...` : normalized;
    return `${index + 1}. ${failure.model}: ${clipped}`;
  });
  const remaining = failures.length - limit;
  if (remaining > 0) {
    lines.push(`... and ${remaining} more`);
  }
  return lines.join("\n");
}

function isGoogleModelNotFoundError(err: unknown): boolean {
  const msg = String(err);
  if (!/not found/i.test(msg)) {
    return false;
  }
  if (/\b404\b/.test(msg)) {
    return true;
  }
  if (/models\/.+ is not found for api version/i.test(msg)) {
    return true;
  }
  if (/"status"\\s*:\\s*"NOT_FOUND"/.test(msg)) {
    return true;
  }
  if (/"code"\\s*:\\s*404/.test(msg)) {
    return true;
  }
  return false;
}

describe("isModelNotFoundErrorMessage", () => {
  it("matches whitespace-separated not found errors", () => {
    expect(isModelNotFoundErrorMessage("404 model not found")).toBe(true);
    expect(isModelNotFoundErrorMessage("model: minimax-text-01 not found")).toBe(true);
  });

  it("still matches underscore and hyphen variants", () => {
    expect(isModelNotFoundErrorMessage("404 model not_found")).toBe(true);
    expect(isModelNotFoundErrorMessage("404 model not-found")).toBe(true);
  });

  it("matches deprecated free model transition messages", () => {
    expect(
      isModelNotFoundErrorMessage(
        "404 The free model has been deprecated. Transition to qwen/qwen3.6-plus for continued paid access.",
      ),
    ).toBe(true);
  });

  it("matches OpenRouter no-endpoints wording", () => {
    expect(
      isModelNotFoundErrorMessage("404 No endpoints found for deepseek/deepseek-r1:free."),
    ).toBe(true);
  });
});

describe("isProviderUnavailableErrorMessage", () => {
  it("matches raw HTML provider error pages from transient upstreams", () => {
    expect(
      isProviderUnavailableErrorMessage(
        "Error: <html><head><title>Service Unavailable</title></head><body>try again</body></html>",
      ),
    ).toBe(true);
  });

  it("matches status-prefixed Cloudflare HTML pages", () => {
    expect(
      isProviderUnavailableErrorMessage(
        "521 <!DOCTYPE html><html><head><title>Web server is down</title></head><body>Cloudflare</body></html>",
      ),
    ).toBe(true);
  });

  it("matches transient upstream 502 errors", () => {
    expect(isProviderUnavailableErrorMessage("502 internal server error")).toBe(true);
    expect(
      isProviderUnavailableErrorMessage("provider returned error: 502 Internal Server Error"),
    ).toBe(true);
  });
});

function isChatGPTUsageLimitErrorMessage(raw: string): boolean {
  const msg = raw.toLowerCase();
  return msg.includes("hit your chatgpt usage limit") && msg.includes("try again in");
}

function isRefreshTokenReused(raw: string): boolean {
  return /refresh_token_reused/i.test(raw);
}

function isAccountIdExtractionError(raw: string): boolean {
  return /failed to extract accountid from token/i.test(raw);
}

function isInstructionsRequiredError(raw: string): boolean {
  return /instructions are required/i.test(raw);
}

function isOpenAiCodexHtmlInterruption(raw: string): boolean {
  const trimmed = raw.trim().replace(/^Error:\s*/i, "");
  return (
    /^(?:<!doctype\s+html\b|<html\b)/i.test(trimmed) &&
    (/<meta\s+name=["']viewport["']/i.test(trimmed) || /<body\b/i.test(trimmed))
  );
}

function isModelTimeoutError(raw: string): boolean {
  return /model call timed out after \d+ms/i.test(raw);
}

function isProviderUnavailableErrorMessage(raw: string): boolean {
  const msg = raw.toLowerCase();
  return (
    isRawHtmlProviderErrorPage(raw) ||
    isCloudflareOrHtmlErrorPage(raw) ||
    msg.includes("no allowed providers are available") ||
    msg.includes("provider unavailable") ||
    msg.includes("upstream provider unavailable") ||
    msg.includes("upstream error from google") ||
    msg.includes("temporarily rate-limited upstream") ||
    msg.includes("unable to access non-serverless model") ||
    msg.includes("create and start a new dedicated endpoint") ||
    msg.includes("no available capacity was found for the model") ||
    (msg.includes("502") && msg.includes("internal server error"))
  );
}

function isRawHtmlProviderErrorPage(raw: string): boolean {
  const normalized = raw
    .trim()
    .replace(/^error:\s*/i, "")
    .trim();
  return /^(?:<!doctype\s+html\b|<html\b)/i.test(normalized) && /<\/html>/i.test(normalized);
}

function isOllamaUnavailableErrorMessage(raw: string): boolean {
  const msg = raw.toLowerCase();
  return (
    msg.includes("ollama could not be reached") ||
    (msg.includes("127.0.0.1:11434") && msg.includes("econnrefused")) ||
    (msg.includes("localhost:11434") && msg.includes("econnrefused"))
  );
}

function isAudioOnlyModelErrorMessage(raw: string): boolean {
  return /requires that either input content or output modality contain audio/i.test(raw);
}

function isUnsupportedReasoningEffortErrorMessage(raw: string): boolean {
  return (
    /does not support parameter reasoningeffort/i.test(raw) ||
    /unsupported value:\s*'low'.*reasoning\.effort.*supported values are:\s*'medium'/i.test(raw)
  );
}

function isUnsupportedThinkingToggleErrorMessage(raw: string): boolean {
  return /does not support parameter [`"]?enable_thinking[`"]?/i.test(raw);
}

function isUnsupportedPlanErrorMessage(raw: string): boolean {
  return /current token plan (?:does )?not support (?:this )?model/i.test(raw);
}

describe("isUnsupportedPlanErrorMessage", () => {
  it("matches provider plan-gated models", () => {
    expect(isUnsupportedPlanErrorMessage("current token plan does not support this model")).toBe(
      true,
    );
    expect(isUnsupportedPlanErrorMessage("your current token plan not support model")).toBe(true);
    expect(isUnsupportedPlanErrorMessage("model not found")).toBe(false);
  });
});

function toInt(value: string | undefined, fallback: number): number {
  const trimmed = value?.trim();
  if (!trimmed) {
    return fallback;
  }
  const parsed = Number.parseInt(trimmed, 10);
  return Number.isFinite(parsed) ? parsed : fallback;
}

function resolveLiveModelConcurrency(raw?: string): number {
  return Math.max(1, toInt(raw, DEFAULT_LIVE_MODEL_CONCURRENCY));
}

describe("resolveLiveModelConcurrency", () => {
  it("defaults direct-model probes to 20-way concurrency", () => {
    expect(resolveLiveModelConcurrency()).toBe(20);
  });

  it("accepts explicit concurrency overrides", () => {
    expect(resolveLiveModelConcurrency("7")).toBe(7);
    expect(resolveLiveModelConcurrency("0")).toBe(1);
  });
});

function resolveLiveModelsJsonTimeoutMs(
  modelsJsonTimeoutRaw?: string,
  setupTimeoutMs = LIVE_SETUP_TIMEOUT_MS,
): number {
  return Math.max(setupTimeoutMs, toInt(modelsJsonTimeoutRaw, 120_000));
}

describe("resolveLiveModelsJsonTimeoutMs", () => {
  it("defaults models.json preparation to a longer setup timeout", () => {
    expect(resolveLiveModelsJsonTimeoutMs(undefined, 45_000)).toBe(120_000);
  });

  it("never goes below the shared live setup timeout", () => {
    expect(resolveLiveModelsJsonTimeoutMs("30000", 45_000)).toBe(45_000);
  });
});

function resolveTestReasoning(
  model: Model<Api>,
): "minimal" | "low" | "medium" | "high" | "xhigh" | undefined {
  if (!model.reasoning) {
    return undefined;
  }
  const id = model.id.toLowerCase();
  if (id.includes("deep-research")) {
    return "medium";
  }
  if (model.provider === "openrouter" && id.startsWith("qwq")) {
    return undefined;
  }
  if (model.provider === "xai" && id.startsWith("grok-4")) {
    return undefined;
  }
  if (model.provider === "openai" || model.provider === "openai-codex") {
    if (id.includes("pro")) {
      return "high";
    }
    return "medium";
  }
  return "low";
}

function resolveLiveSystemPrompt(model: Model<Api>): string | undefined {
  if (model.provider === "openai-codex") {
    return "You are a concise assistant. Follow the user's instruction exactly.";
  }
  return undefined;
}

describe("resolveLiveSystemPrompt", () => {
  it("adds instructions for openai-codex probes", () => {
    expect(
      resolveLiveSystemPrompt({
        provider: "openai-codex",
      } as Model<Api>),
    ).toContain("Follow the user's instruction exactly.");
  });

  it("keeps other providers unchanged", () => {
    expect(
      resolveLiveSystemPrompt({
        provider: "openai",
      } as Model<Api>),
    ).toBeUndefined();
  });

  it("matches OpenAI Codex HTML interruption pages", () => {
    expect(
      isOpenAiCodexHtmlInterruption(
        'Error: <html><head><meta name="viewport" content="width=device-width" /></head><body>Try again</body></html>',
      ),
    ).toBe(true);
    expect(isOpenAiCodexHtmlInterruption("Error: connection reset")).toBe(false);
  });
});

async function completeSimpleWithTimeout<TApi extends Api>(
  model: Model<TApi>,
  context: Parameters<typeof completeSimple<TApi>>[1],
  options: Parameters<typeof completeSimple<TApi>>[2],
  timeoutMs: number,
  progressContext: string,
) {
  const maxTimeoutMs = Math.max(1, timeoutMs);
  const controller = new AbortController();
  const abortTimer = setTimeout(() => {
    controller.abort();
  }, maxTimeoutMs);
  abortTimer.unref?.();
  let hardTimer: ReturnType<typeof setTimeout> | undefined;
  const timeout = new Promise<never>((_, reject) => {
    hardTimer = setTimeout(() => {
      reject(new Error(`model call timed out after ${maxTimeoutMs}ms`));
    }, maxTimeoutMs);
    hardTimer.unref?.();
  });
  try {
    return await withLiveHeartbeat(
      Promise.race([
        completeSimple(model, context, {
          ...options,
          signal: controller.signal,
        }),
        timeout,
      ]),
      progressContext,
    );
  } finally {
    clearTimeout(abortTimer);
    if (hardTimer) {
      clearTimeout(hardTimer);
    }
  }
}

async function completeOkWithRetry(params: {
  model: Model<Api>;
  apiKey: string;
  timeoutMs: number;
  progressLabel: string;
}) {
  const runOnce = async (maxTokens: number) => {
    const res = await completeSimpleWithTimeout(
      params.model,
      {
        systemPrompt: resolveLiveSystemPrompt(params.model),
        messages: [
          {
            role: "user",
            content: "Reply with the word ok.",
            timestamp: Date.now(),
          },
        ],
      },
      {
        apiKey: params.apiKey,
        reasoning: resolveTestReasoning(params.model),
        maxTokens,
      },
      params.timeoutMs,
      `${params.progressLabel}: prompt call (maxTokens=${maxTokens})`,
    );
    const text = res.content
      .filter((block) => block.type === "text")
      .map((block) => block.text.trim())
      .join(" ");
    return { res, text };
  };

  const first = await runOnce(64);
  if (first.text.length > 0) {
    return first;
  }
  // Some providers (for example Moonshot Kimi and MiniMax M2.5) may emit
  // reasoning blocks first and only return text once token budget is higher.
  return await runOnce(256);
}

function isDeepSeekV4Model(model: Pick<Model<Api>, "id" | "provider">): boolean {
  return (
    model.provider === "deepseek" &&
    (model.id === "deepseek-v4-flash" || model.id === "deepseek-v4-pro")
  );
}

async function runDeepSeekV4ReplayRegression(params: {
  model: Model<Api>;
  apiKey: string;
  timeoutMs: number;
  progressLabel: string;
}) {
  const noopTool = {
    name: "noop",
    description: "Return ok.",
    parameters: Type.Object({}, { additionalProperties: false }),
  };
  let firstUser = {
    role: "user" as const,
    content: "Call the tool `noop` with {}. Do not write any other text.",
    timestamp: Date.now(),
  };
  let first = await completeSimpleWithTimeout(
    params.model,
    { messages: [firstUser], tools: [noopTool] },
    {
      apiKey: params.apiKey,
      reasoning: resolveTestReasoning(params.model),
      maxTokens: 256,
    },
    params.timeoutMs,
    `${params.progressLabel}: DeepSeek V4 replay first call`,
  );
  let toolCall = first.content.find((block) => block.type === "toolCall");

  for (let i = 0; i < 2 && !toolCall; i += 1) {
    firstUser = {
      role: "user" as const,
      content: "Call the tool `noop` with {}. IMPORTANT: respond with the tool call.",
      timestamp: Date.now(),
    };
    first = await completeSimpleWithTimeout(
      params.model,
      { messages: [firstUser], tools: [noopTool] },
      {
        apiKey: params.apiKey,
        reasoning: resolveTestReasoning(params.model),
        maxTokens: 256,
      },
      params.timeoutMs,
      `${params.progressLabel}: DeepSeek V4 replay retry ${i + 1}`,
    );
    toolCall = first.content.find((block) => block.type === "toolCall");
  }

  expect(toolCall).toBeTruthy();
  if (!toolCall || toolCall.type !== "toolCall") {
    throw new Error("expected DeepSeek V4 tool call");
  }

  const second = await completeSimpleWithTimeout(
    params.model,
    {
      messages: [
        firstUser,
        first,
        {
          role: "toolResult",
          toolCallId: toolCall.id,
          toolName: "noop",
          content: [{ type: "text", text: "ok" }],
          isError: false,
          timestamp: Date.now(),
        },
        {
          role: "user",
          content: "Reply with the word ok.",
          timestamp: Date.now(),
        },
      ],
    },
    {
      apiKey: params.apiKey,
      reasoning: resolveTestReasoning(params.model),
      maxTokens: 256,
    },
    params.timeoutMs,
    `${params.progressLabel}: DeepSeek V4 replay followup`,
  );
  if (second.stopReason === "error") {
    throw new Error(second.errorMessage || "DeepSeek V4 replay followup returned error");
  }
  expect(extractAssistantText(second).length).toBeGreaterThan(0);
}

async function runExtraTurnProbes(params: {
  model: Model<Api>;
  apiKey: string;
  timeoutMs: number;
  progressLabel: string;
}) {
  if (shouldSkipLiveModelExtraProbes(params.model)) {
    logProgress(`${params.progressLabel}: extra probes skipped (known empty route)`);
    return;
  }
  const options = {
    apiKey: params.apiKey,
    reasoning: resolveTestReasoning(params.model),
    maxTokens: 128,
  };
  if (LIVE_FILE_PROBE_ENABLED && !shouldSkipLiveModelFileProbe(params.model)) {
    logProgress(`${params.progressLabel}: file-read probe`);
    const file = await completeSimpleWithTimeout(
      params.model,
      buildLiveModelFileProbeContext({ systemPrompt: resolveLiveSystemPrompt(params.model) }),
      options,
      params.timeoutMs,
      `${params.progressLabel}: file-read probe`,
    );
    if (file.stopReason === "error") {
      throw new Error(file.errorMessage || "file-read probe returned error with no message");
    }
    let fileText = extractAssistantText(file);
    if (!fileProbeTextMatches(fileText)) {
      logProgress(`${params.progressLabel}: file-read probe retry`);
      const retry = await completeSimpleWithTimeout(
        params.model,
        buildLiveModelFileProbeRetryContext({
          systemPrompt: resolveLiveSystemPrompt(params.model),
        }),
        options,
        params.timeoutMs,
        `${params.progressLabel}: file-read probe retry`,
      );
      if (retry.stopReason === "error") {
        throw new Error(
          retry.errorMessage || "file-read probe retry returned error with no message",
        );
      }
      fileText = extractAssistantText(retry);
    }
    if (!fileProbeTextMatches(fileText)) {
      if (fileText.length === 0) {
        logProgress(`${params.progressLabel}: file-read probe skipped (empty response)`);
      } else {
        throw new Error(
          `file-read probe did not return ${LIVE_MODEL_FILE_PROBE_TOKEN}: ${fileText}`,
        );
      }
    }
  } else if (LIVE_FILE_PROBE_ENABLED) {
    logProgress(`${params.progressLabel}: file-read probe skipped (known empty route)`);
  }

  if (!LIVE_IMAGE_PROBE_ENABLED) {
    return;
  }
  if (!modelSupportsImageInput(params.model)) {
    logProgress(`${params.progressLabel}: image probe skipped (no image input)`);
    return;
  }
  if (shouldSkipLiveModelImageProbe(params.model)) {
    logProgress(`${params.progressLabel}: image probe skipped (known empty route)`);
    return;
  }

  logProgress(`${params.progressLabel}: image probe`);
  const image = await completeSimpleWithTimeout(
    params.model,
    buildLiveModelImageProbeContext({ systemPrompt: resolveLiveSystemPrompt(params.model) }),
    options,
    params.timeoutMs,
    `${params.progressLabel}: image probe`,
  );
  if (image.stopReason === "error") {
    throw new Error(image.errorMessage || "image probe returned error with no message");
  }
  const imageText = extractAssistantText(image);
  if (!imageProbeTextMatches(imageText)) {
    if (imageText.length === 0) {
      logProgress(`${params.progressLabel}: image probe skipped (empty response)`);
      return;
    }
    throw new Error(`image probe did not return ok: ${imageText}`);
  }
}

describeLive("live models (profile keys)", () => {
  it(
    "completes across selected models",
    async () => {
      logProgress("[live-models] loading config");
      const cfg = await withLiveStageTimeout(
        Promise.resolve().then(() => loadConfig()),
        "[live-models] load config",
      );
      logProgress("[live-models] preparing models.json");
      await withLiveStageTimeout(
        ensureOpenClawModelsJson(cfg),
        "[live-models] prepare models.json",
        LIVE_MODELS_JSON_TIMEOUT_MS,
      );
      if (!DIRECT_ENABLED) {
        logProgress(
          "[live-models] skipping (set OPENCLAW_LIVE_MODELS=modern|all|<list>; all=modern)",
        );
        return;
      }
      const anthropicKeys = collectAnthropicApiKeys();
      if (anthropicKeys.length > 0) {
        process.env.ANTHROPIC_API_KEY = anthropicKeys[0];
        logProgress(`[live-models] anthropic keys loaded: ${anthropicKeys.length}`);
      }

      const agentDir = resolveOpenClawAgentDir();
      const authStorage = discoverAuthStorage(agentDir);
      logProgress("[live-models] loading model registry");
      const models = await withLiveStageTimeout(
        Promise.resolve().then(() => discoverModels(authStorage, agentDir).getAll()),
        "[live-models] load model registry",
      );

      const rawModels = process.env.OPENCLAW_LIVE_MODELS?.trim();
      const useModern = rawModels === "modern" || rawModels === "all";
      const useExplicit = Boolean(rawModels) && !useModern;
      const filter = useExplicit ? parseModelFilter(rawModels) : null;
      const allowNotFoundSkip = useModern;
      const providers = parseProviderFilter(process.env.OPENCLAW_LIVE_PROVIDERS);
      const perModelTimeoutMs = toInt(process.env.OPENCLAW_LIVE_MODEL_TIMEOUT_MS, 30_000);
      const maxModels = resolveHighSignalLiveModelLimit({
        rawMaxModels: process.env.OPENCLAW_LIVE_MAX_MODELS,
        useExplicitModels: useExplicit,
      });
      const targetMatcher = createLiveTargetMatcher({
        providerFilter: providers,
        modelFilter: filter,
        config: cfg,
        env: process.env,
      });

      const failures: Array<{ model: string; error: string }> = [];
      const skipped: Array<{ model: string; reason: string }> = [];
      const candidates: Array<{
        model: Model<Api>;
        apiKeyInfo: Awaited<ReturnType<typeof getApiKeyForModel>>;
      }> = [];

      for (const model of models) {
        if (shouldSuppressBuiltInModel({ provider: model.provider, id: model.id })) {
          continue;
        }
        if (!targetMatcher.matchesProvider(model.provider)) {
          continue;
        }
        const id = `${model.provider}/${model.id}`;
        if (!targetMatcher.matchesModel(model.provider, model.id)) {
          continue;
        }
        if (!filter && useModern) {
          if (
            shouldExcludeProviderFromDefaultHighSignalLiveSweep({
              provider: model.provider,
              useExplicitModels: useExplicit,
              providerFilter: providers,
              config: cfg,
              env: process.env,
            })
          ) {
            continue;
          }
          if (!isHighSignalLiveModelRef({ provider: model.provider, id: model.id })) {
            continue;
          }
        }
        try {
          const apiKeyInfo = await getApiKeyForModel({
            model,
            cfg,
            credentialPrecedence: LIVE_CREDENTIAL_PRECEDENCE,
          });
          if (REQUIRE_PROFILE_KEYS && !apiKeyInfo.source.startsWith("profile:")) {
            skipped.push({
              model: id,
              reason: `non-profile credential source: ${apiKeyInfo.source}`,
            });
            continue;
          }
          candidates.push({ model, apiKeyInfo });
        } catch (err) {
          skipped.push({ model: id, reason: String(err) });
        }
      }

      if (candidates.length === 0) {
        logProgress("[live-models] no API keys found; skipping");
        return;
      }

      const selectedCandidates = selectHighSignalLiveItems(
        candidates,
        maxModels > 0 ? maxModels : candidates.length,
        (entry) => ({ provider: entry.model.provider, id: entry.model.id }),
        (entry) => entry.model.provider,
      );
      logProgress(`[live-models] selection=${useExplicit ? "explicit" : "high-signal"}`);
      if (selectedCandidates.length < candidates.length) {
        logProgress(
          `[live-models] capped to ${selectedCandidates.length}/${candidates.length} via OPENCLAW_LIVE_MAX_MODELS=${maxModels}`,
        );
      }
      logProgress(`[live-models] running ${selectedCandidates.length} models`);
      logProgress(
        `[live-models] heartbeat=${formatElapsedSeconds(LIVE_HEARTBEAT_MS)} timeout=${formatElapsedSeconds(perModelTimeoutMs)} concurrency=${LIVE_MODEL_CONCURRENCY}`,
      );
      const total = selectedCandidates.length;

      const tasks = selectedCandidates.map((entry, index) => async () => {
        const { model, apiKeyInfo } = entry;
        const id = `${model.provider}/${model.id}`;
        const progressLabel = `[live-models] ${index + 1}/${total} ${id}`;
        const attemptMax =
          model.provider === "anthropic" && anthropicKeys.length > 0 ? anthropicKeys.length : 1;
        for (let attempt = 0; attempt < attemptMax; attempt += 1) {
          if (model.provider === "anthropic" && anthropicKeys.length > 0) {
            process.env.ANTHROPIC_API_KEY = anthropicKeys[attempt];
          }
          const apiKey =
            model.provider === "anthropic" && anthropicKeys.length > 0
              ? anthropicKeys[attempt]
              : requireApiKey(apiKeyInfo, model.provider);
          try {
            // Special regression: OpenAI requires replayed `reasoning` items for tool-only turns.
            if (
              model.provider === "openai" &&
              model.api === "openai-responses" &&
              model.id === "gpt-5.2"
            ) {
              logProgress(`${progressLabel}: tool-only regression`);
              const noopTool = {
                name: "noop",
                description: "Return ok.",
                parameters: Type.Object({}, { additionalProperties: false }),
              };

              let firstUserContent = "Call the tool `noop` with {}. Do not write any other text.";
              let firstUser = {
                role: "user" as const,
                content: firstUserContent,
                timestamp: Date.now(),
              };

              let first = await completeSimpleWithTimeout(
                model,
                { messages: [firstUser], tools: [noopTool] },
                {
                  apiKey,
                  reasoning: resolveTestReasoning(model),
                  maxTokens: 128,
                },
                perModelTimeoutMs,
                `${progressLabel}: tool-only regression first call`,
              );

              let toolCall = first.content.find((b) => b.type === "toolCall");
              let firstText = first.content
                .filter((b) => b.type === "text")
                .map((b) => b.text.trim())
                .join(" ")
                .trim();

              // Occasional flake: model answers in text instead of tool call (or adds text).
              // Retry a couple times with a stronger instruction so we still exercise the tool-only replay path.
              for (let i = 0; i < 2 && (!toolCall || firstText.length > 0); i += 1) {
                firstUserContent =
                  "Call the tool `noop` with {}. IMPORTANT: respond ONLY with the tool call; no other text.";
                firstUser = {
                  role: "user" as const,
                  content: firstUserContent,
                  timestamp: Date.now(),
                };

                first = await completeSimpleWithTimeout(
                  model,
                  { messages: [firstUser], tools: [noopTool] },
                  {
                    apiKey,
                    reasoning: resolveTestReasoning(model),
                    maxTokens: 128,
                  },
                  perModelTimeoutMs,
                  `${progressLabel}: tool-only regression retry ${i + 1}`,
                );

                toolCall = first.content.find((b) => b.type === "toolCall");
                firstText = first.content
                  .filter((b) => b.type === "text")
                  .map((b) => b.text.trim())
                  .join(" ")
                  .trim();
              }

              expect(toolCall).toBeTruthy();
              expect(firstText.length).toBe(0);
              if (!toolCall || toolCall.type !== "toolCall") {
                throw new Error("expected tool call");
              }

              const second = await completeSimpleWithTimeout(
                model,
                {
                  messages: [
                    firstUser,
                    first,
                    {
                      role: "toolResult",
                      toolCallId: toolCall.id,
                      toolName: "noop",
                      content: [{ type: "text", text: "ok" }],
                      isError: false,
                      timestamp: Date.now(),
                    },
                    {
                      role: "user",
                      content: "Reply with the word ok.",
                      timestamp: Date.now(),
                    },
                  ],
                },
                {
                  apiKey,
                  reasoning: resolveTestReasoning(model),
                  // Headroom: reasoning summary can consume most of the output budget.
                  maxTokens: 256,
                },
                perModelTimeoutMs,
                `${progressLabel}: tool-only regression followup`,
              );

              const secondText = second.content
                .filter((b) => b.type === "text")
                .map((b) => b.text.trim())
                .join(" ");
              expect(secondText.length).toBeGreaterThan(0);
              await runExtraTurnProbes({
                model,
                apiKey,
                timeoutMs: perModelTimeoutMs,
                progressLabel,
              });
              logProgress(`${progressLabel}: done`);
              break;
            }

            if (isDeepSeekV4Model(model)) {
              logProgress(`${progressLabel}: DeepSeek V4 replay regression`);
              await runDeepSeekV4ReplayRegression({
                model,
                apiKey,
                timeoutMs: perModelTimeoutMs,
                progressLabel,
              });
              await runExtraTurnProbes({
                model,
                apiKey,
                timeoutMs: perModelTimeoutMs,
                progressLabel,
              });
              logProgress(`${progressLabel}: done`);
              break;
            }

            logProgress(`${progressLabel}: prompt`);
            const ok = await completeOkWithRetry({
              model,
              apiKey,
              timeoutMs: perModelTimeoutMs,
              progressLabel,
            });

            if (ok.res.stopReason === "error") {
              const msg = ok.res.errorMessage ?? "";
              if (allowNotFoundSkip && isModelNotFoundErrorMessage(msg)) {
                skipped.push({ model: id, reason: msg });
                logProgress(`${progressLabel}: skip (model not found)`);
                break;
              }
              throw new Error(msg || "model returned error with no message");
            }

            if (
              ok.text.length === 0 &&
              (model.provider === "google" || model.provider === "google-gemini-cli")
            ) {
              skipped.push({
                model: id,
                reason: "no text returned (likely unavailable model id)",
              });
              logProgress(`${progressLabel}: skip (google model not found)`);
              break;
            }
            if (
              ok.text.length === 0 &&
              (model.provider === "openrouter" ||
                model.provider === "opencode" ||
                model.provider === "opencode-go")
            ) {
              skipped.push({
                model: id,
                reason: "no text returned (provider returned empty content)",
              });
              logProgress(`${progressLabel}: skip (empty response)`);
              break;
            }
            if (
              ok.text.length === 0 &&
              allowNotFoundSkip &&
              (model.provider === "fireworks" ||
                model.provider === "google-antigravity" ||
                model.provider === "minimax" ||
                model.provider === "openai-codex" ||
                model.provider === "xai" ||
                model.provider === "zai")
            ) {
              skipped.push({
                model: id,
                reason: "no text returned (provider returned empty content)",
              });
              logProgress(`${progressLabel}: skip (empty response)`);
              break;
            }
            expect(ok.text.length).toBeGreaterThan(0);
            await runExtraTurnProbes({
              model,
              apiKey,
              timeoutMs: perModelTimeoutMs,
              progressLabel,
            });
            logProgress(`${progressLabel}: done`);
            break;
          } catch (err) {
            const message = String(err);
            if (
              model.provider === "anthropic" &&
              isAnthropicRateLimitError(message) &&
              attempt + 1 < attemptMax
            ) {
              logProgress(`${progressLabel}: rate limit, retrying with next key`);
              continue;
            }
            if (model.provider === "anthropic" && isAnthropicRateLimitError(message)) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (anthropic rate limit)`);
              break;
            }
            if (model.provider === "anthropic" && isAnthropicBillingError(message)) {
              if (attempt + 1 < attemptMax) {
                logProgress(`${progressLabel}: billing issue, retrying with next key`);
                continue;
              }
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (anthropic billing)`);
              break;
            }
            if (
              (model.provider === "google" || model.provider === "google-gemini-cli") &&
              isGoogleModelNotFoundError(err)
            ) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (google model not found)`);
              break;
            }
            if (
              allowNotFoundSkip &&
              model.provider === "minimax" &&
              message.includes("request ended without sending any chunks")
            ) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (minimax empty response)`);
              break;
            }
            if (
              allowNotFoundSkip &&
              (model.provider === "minimax" ||
                model.provider === "zai" ||
                model.provider === "openrouter") &&
              isRateLimitErrorMessage(message)
            ) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (rate limit)`);
              break;
            }
            if (
              allowNotFoundSkip &&
              (model.provider === "opencode" || model.provider === "opencode-go") &&
              isRateLimitErrorMessage(message)
            ) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (rate limit)`);
              break;
            }
            if (
              allowNotFoundSkip &&
              model.provider === "openai-codex" &&
              isRefreshTokenReused(message)
            ) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (codex refresh token reused)`);
              break;
            }
            if (
              allowNotFoundSkip &&
              model.provider === "openai-codex" &&
              isAccountIdExtractionError(message)
            ) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (codex account id extraction)`);
              break;
            }
            if (
              allowNotFoundSkip &&
              model.provider === "openai-codex" &&
              isChatGPTUsageLimitErrorMessage(message)
            ) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (chatgpt usage limit)`);
              break;
            }
            if (
              allowNotFoundSkip &&
              model.provider === "openai-codex" &&
              isInstructionsRequiredError(message)
            ) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (instructions required)`);
              break;
            }
            if (
              allowNotFoundSkip &&
              model.provider === "openai-codex" &&
              isOpenAiCodexHtmlInterruption(message)
            ) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (codex html interruption)`);
              break;
            }
            if (allowNotFoundSkip && isModelTimeoutError(message)) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (timeout)`);
              break;
            }
            if (allowNotFoundSkip && isProviderUnavailableErrorMessage(message)) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (provider unavailable)`);
              break;
            }
            if (allowNotFoundSkip && isModelNotFoundErrorMessage(message)) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (model not found)`);
              break;
            }
            if (allowNotFoundSkip && isAudioOnlyModelErrorMessage(message)) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (audio-only model)`);
              break;
            }
            if (allowNotFoundSkip && isUnsupportedReasoningEffortErrorMessage(message)) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (reasoning unsupported)`);
              break;
            }
            if (allowNotFoundSkip && isUnsupportedThinkingToggleErrorMessage(message)) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (thinking toggle unsupported)`);
              break;
            }
            if (allowNotFoundSkip && isUnsupportedPlanErrorMessage(message)) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (plan unsupported)`);
              break;
            }
            if (
              allowNotFoundSkip &&
              model.provider === "ollama" &&
              isOllamaUnavailableErrorMessage(message)
            ) {
              skipped.push({ model: id, reason: message });
              logProgress(`${progressLabel}: skip (ollama unavailable)`);
              break;
            }
            logProgress(`${progressLabel}: failed`);
            failures.push({ model: id, error: message });
            break;
          }
        }
      });

      await runTasksWithConcurrency({
        tasks,
        limit: LIVE_MODEL_CONCURRENCY,
      });

      if (failures.length > 0) {
        const preview = formatFailurePreview(failures, 20);
        throw new Error(
          `live model failures (${failures.length}, showing ${Math.min(failures.length, 20)}):\n${preview}`,
        );
      }

      void skipped;
    },
    LIVE_TEST_TIMEOUT_MS,
  );
});

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.15 Sekunden (vorverarbeitet am 2026-06-09) ¤

Wurzel

Suchen

PVS Prover

Isabelle Prover

NIST Cobol Testsuite

Cephes Mathematical Library

Vienna Development Method

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.