export function isReasoningConstraintErrorMessage(raw: string): boolean { if (!raw) { returnfalse;
} const lower = normalizeLowercaseStringOrEmpty(raw); return (
lower.includes("reasoning is mandatory") ||
lower.includes("reasoning is required") ||
lower.includes("requires reasoning") ||
(lower.includes("reasoning") && lower.includes("cannot be disabled"))
);
}
function hasRateLimitTpmHint(raw: string): boolean { const lower = normalizeLowercaseStringOrEmpty(raw); return /\btpm\b/i.test(lower) || lower.includes("tokens per minute");
}
export function isContextOverflowError(errorMessage?: string): boolean { if (!errorMessage) { returnfalse;
} const lower = normalizeLowercaseStringOrEmpty(errorMessage);
// Groq uses 413 for TPM (tokens per minute) limits, which is a rate limit, not context overflow. if (hasRateLimitTpmHint(errorMessage)) { returnfalse;
}
if (isReasoningConstraintErrorMessage(errorMessage)) { returnfalse;
}
const hasRequestSizeExceeds = lower.includes("request size exceeds"); const hasContextWindow =
lower.includes("context window") ||
lower.includes("context length") ||
lower.includes("maximum context length"); return (
lower.includes("request_too_large") ||
(lower.includes("invalid_argument") && lower.includes("maximum number of tokens")) ||
lower.includes("request exceeds the maximum size") ||
lower.includes("context length exceeded") ||
lower.includes("maximum context length") ||
lower.includes("prompt is too long") ||
lower.includes("prompt too long") ||
lower.includes("exceeds model context window") ||
lower.includes("model token limit") ||
(lower.includes("input exceeds") && lower.includes("maximum number of tokens")) ||
(hasRequestSizeExceeds && hasContextWindow) ||
lower.includes("context overflow:") ||
lower.includes("exceed context limit") ||
lower.includes("exceeds the model's maximum context") ||
(lower.includes("max_tokens") && lower.includes("exceed") && lower.includes("context")) ||
(lower.includes("input length") && lower.includes("exceed") && lower.includes("context")) ||
(lower.includes("413") && lower.includes("too large")) || // Anthropic API and OpenAI-compatible providers (e.g. ZhipuAI/GLM) return this stop reason // when the context window is exceeded. pi-ai surfaces it as "Unhandled stop reason: model_context_window_exceeded".
lower.includes("context_window_exceeded") || // Chinese proxy error messages for context overflow
errorMessage.includes("上下文过长") ||
errorMessage.includes("上下文超出") ||
errorMessage.includes("上下文长度超") ||
errorMessage.includes("超出最大上下文") ||
errorMessage.includes("请压缩上下文") || // Provider-specific patterns (Bedrock, Azure, Ollama, Mistral, Cohere, etc.)
matchesProviderContextOverflow(errorMessage)
);
}
const CONTEXT_WINDOW_TOO_SMALL_RE = /context window.*(too small|minimum is)/i; const CONTEXT_OVERFLOW_HINT_RE =
/context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|prompt.*(too (?:large|long)|exceed|over|limit|max(?:imum)?)|(?:request|input).*(?:context|window|length|token).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i; const RATE_LIMIT_HINT_RE =
/rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b|tokens per day/i;
export function isLikelyContextOverflowError(errorMessage?: string): boolean { if (!errorMessage) { returnfalse;
}
// Groq uses 413 for TPM (tokens per minute) limits, which is a rate limit, not context overflow. if (hasRateLimitTpmHint(errorMessage)) { returnfalse;
}
if (isReasoningConstraintErrorMessage(errorMessage)) { returnfalse;
}
// Billing/quota errors can contain patterns like "request size exceeds" or // "maximum token limit exceeded" that match the context overflow heuristic. // Billing is a more specific error class — exclude it early. if (isBillingErrorMessage(errorMessage)) { returnfalse;
}
if (CONTEXT_WINDOW_TOO_SMALL_RE.test(errorMessage)) { returnfalse;
} // Rate limit errors can match the broad CONTEXT_OVERFLOW_HINT_RE pattern // (e.g., "request reached organization TPD rate limit" matches request.*limit). // Exclude them before checking context overflow heuristics. if (isRateLimitErrorMessage(errorMessage)) { returnfalse;
} if (isContextOverflowError(errorMessage)) { returntrue;
} if (RATE_LIMIT_HINT_RE.test(errorMessage)) { returnfalse;
} return CONTEXT_OVERFLOW_HINT_RE.test(errorMessage);
}
export function isCompactionFailureError(errorMessage?: string): boolean { if (!errorMessage) { returnfalse;
} const lower = normalizeLowercaseStringOrEmpty(errorMessage); const hasCompactionTerm =
lower.includes("summarization failed") ||
lower.includes("auto-compaction") ||
lower.includes("compaction failed") ||
lower.includes("compaction"); if (!hasCompactionTerm) { returnfalse;
} // Treat any likely overflow shape as a compaction failure when compaction terms are present. // Providers often vary wording (e.g. "context window exceeded") across APIs. if (isLikelyContextOverflowError(errorMessage)) { returntrue;
} // Keep explicit fallback for bare "context overflow" strings. return lower.includes("context overflow");
}
const OBSERVED_OVERFLOW_TOKEN_PATTERNS = [
/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i,
/requested\s+([\d,]+)\s+tokens/i,
/resulted in\s+([\d,]+)\s+tokens/i,
];
export function extractObservedOverflowTokenCount(errorMessage?: string): number | undefined { if (!errorMessage) { return undefined;
}
for (const pattern of OBSERVED_OVERFLOW_TOKEN_PATTERNS) { const match = errorMessage.match(pattern); const rawCount = match?.[1]?.replaceAll(",", ""); if (!rawCount) { continue;
} const parsed = Number(rawCount); if (Number.isFinite(parsed) && parsed > 0) { return Math.floor(parsed);
}
}
function isTransportHtmlErrorStatus(status: number | undefined): boolean { return (
status === 408 ||
status === 499 ||
(typeof status === "number" && status >= 500 && status < 600)
);
}
const diskSpaceCopy = formatDiskSpaceErrorCopy(raw); if (diskSpaceCopy) { return diskSpaceCopy;
}
if (providerRuntimeFailureKind === "auth_refresh") { return"Authentication refresh failed. Re-authenticate this provider and try again.";
}
if (providerRuntimeFailureKind === "refresh_contention") { return ( "Authentication refresh is already in progress elsewhere and this attempt " + "timed out waiting for it. Retry in a moment."
);
}
if (providerRuntimeFailureKind === "refresh_timeout") { return ( "Authentication refresh timed out before the provider completed. " + "Retry in a moment; re-authenticate only if it keeps failing."
);
}
if (providerRuntimeFailureKind === "callback_timeout") { return ( "Browser OAuth did not complete before manual fallback kicked in. " + "Retry the login flow and paste the redirect URL if prompted."
);
}
if (providerRuntimeFailureKind === "callback_validation") { return ( "Browser OAuth returned an invalid or incomplete callback. " + "Retry the login flow and make sure the full redirect URL is pasted if prompted."
);
}
if (providerRuntimeFailureKind === "auth_scope") { return ( "Authentication is missing the required OpenAI Codex scopes. " + "Re-run OpenAI/Codex login and try again."
);
}
if (providerRuntimeFailureKind === "auth_html_403") { return ( "Authentication failed with an HTML 403 response from the provider. " + "Re-authenticate and verify your provider account access."
);
}
if (providerRuntimeFailureKind === "upstream_html") { return ( "The provider returned an HTML error page instead of an API response. " + "This usually means a CDN or gateway (e.g. Cloudflare) blocked the request. " + "Retry in a moment or check provider status."
);
}
if (providerRuntimeFailureKind === "proxy") { return"LLM request failed: proxy or tunnel configuration blocked the provider request.";
}
if (isContextOverflowError(raw)) { return ( "Context overflow: prompt too large for the model. " + "Try /reset (or /new) to start a fresh session, or use a larger-context model."
);
}
if (isReasoningConstraintErrorMessage(raw)) { return ( "Reasoning is required for this model endpoint. " + "Use /think minimal (or any non-off level) and try again."
);
}
if (isInvalidStreamingEventOrderError(raw)) { return"LLM request failed: provider returned an invalid streaming response. Please try again.";
}
// Catch role ordering errors - including JSON-wrapped and "400" prefix variants if (
/incorrect role information|roles must alternate|400.*role|"message".*role.*information/i.test(
raw,
)
) { return ( "Message ordering conflict - please try again. " + "If this persists, use /new to start a fresh session."
);
}
if (isMissingToolCallInputError(raw)) { return ( "Session history looks corrupted (tool call input missing). " + "Use /new to start a fresh session. " + "If this keeps happening, reset the session or delete the corrupted session transcript."
);
}
const transientCopy = formatRateLimitOrOverloadedErrorCopy(raw); if (transientCopy) { return transientCopy;
}
const transportCopy = formatTransportErrorCopy(raw); if (transportCopy) { return transportCopy;
}
if (isTimeoutErrorMessage(raw)) { return"LLM request timed out.";
}
if (isBillingErrorMessage(raw)) { return formatBillingErrorMessage(opts?.provider, opts?.model ?? msg.model);
}
if (providerRuntimeFailureKind === "schema") { return"LLM request failed: provider rejected the request schema or tool payload.";
}
if (providerRuntimeFailureKind === "replay_invalid") { return ( "Session history or replay state is invalid. " + "Use /new to start a fresh session and try again."
);
}
if (isLikelyHttpErrorText(raw) || isRawApiErrorPayload(raw)) { return formatRawAssistantErrorForUi(raw);
}
// Never return raw unhandled errors - log for debugging but return safe message if (raw.length > 600) {
log.warn(`Long error truncated: ${raw.slice(0, 200)}`);
} return raw.length > 600 ? `${raw.slice(0, 600)}…` : raw;
}
// Transient signal patterns for api_error payloads. Only treat an api_error as // retryable when the message text itself indicates a transient server issue. // Non-transient api_error payloads (context overflow, validation/schema errors) // must NOT be classified as timeout. const API_ERROR_TRANSIENT_SIGNALS_RE =
/internal server error|overload|temporarily unavailable|service unavailable|unknown error|server error|bad gateway|gateway timeout|upstream error|backend error|try again later|temporarily.+unable|unexpected error/i;
function isJsonApiInternalServerError(raw: string): boolean { if (!raw) { returnfalse;
} const value = normalizeLowercaseStringOrEmpty(raw); // Providers wrap transient 5xx errors in JSON payloads like: // {"type":"error","error":{"type":"api_error","message":"Internal server error"}} // Non-standard providers (e.g. MiniMax) may use different message text: // {"type":"api_error","message":"unknown error, 520 (1000)"} if (!value.includes('"type":"api_error"')) { returnfalse;
} // Billing and auth errors can also carry "type":"api_error". Exclude them so // the more specific classifiers further down the chain handle them correctly. if (isBillingErrorMessage(raw) || isAuthErrorMessage(raw) || isAuthPermanentErrorMessage(raw)) { returnfalse;
} // Only match when the message contains a transient signal. api_error payloads // with non-transient messages (e.g. context overflow, schema validation) should // fall through to more specific classifiers or remain unclassified. return API_ERROR_TRANSIENT_SIGNALS_RE.test(raw);
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.