|
|
|
|
Quelle realtime-transcription-provider.ts
Sprache: JAVA
|
|
Spracherkennung für: .ts vermutete Sprache: Unknown {[0] [0] [0]} [Methode: Schwerpunktbildung, einfache Gewichte, sechs Dimensionen]
import {
createRealtimeTranscriptionWebSocketSession,
type RealtimeTranscriptionProviderConfig,
type RealtimeTranscriptionProviderPlugin,
type RealtimeTranscriptionSession,
type RealtimeTranscriptionSessionCreateRequest,
type RealtimeTranscriptionWebSocketTransport,
} from "openclaw/plugin-sdk/realtime-transcription";
import { normalizeResolvedSecretInputString } from "openclaw/plugin-sdk/secret-input";
import { normalizeOptionalString } from "openclaw/plugin-sdk/text-runtime";
import { XAI_BASE_URL } from "./model-definitions.js";
type XaiRealtimeTranscriptionEncoding = "pcm" | "mulaw" | "alaw";
type XaiRealtimeTranscriptionProviderConfig = {
apiKey?: string;
baseUrl?: string;
sampleRate?: number;
encoding?: XaiRealtimeTranscriptionEncoding;
interimResults?: boolean;
endpointingMs?: number;
language?: string;
};
type XaiRealtimeTranscriptionSessionConfig = RealtimeTranscriptionSessionCreateRequest & {
apiKey: string;
baseUrl: string;
sampleRate: number;
encoding: XaiRealtimeTranscriptionEncoding;
interimResults: boolean;
endpointingMs: number;
language?: string;
};
type XaiRealtimeTranscriptionEvent = {
type?: string;
text?: string;
transcript?: string;
is_final?: boolean;
speech_final?: boolean;
error?: unknown;
message?: string;
};
const XAI_REALTIME_STT_DEFAULT_SAMPLE_RATE = 8000;
const XAI_REALTIME_STT_DEFAULT_ENCODING: XaiRealtimeTranscriptionEncoding = "mulaw";
const XAI_REALTIME_STT_DEFAULT_ENDPOINTING_MS = 800;
const XAI_REALTIME_STT_CONNECT_TIMEOUT_MS = 10_000;
const XAI_REALTIME_STT_CLOSE_TIMEOUT_MS = 5_000;
const XAI_REALTIME_STT_MAX_RECONNECT_ATTEMPTS = 5;
const XAI_REALTIME_STT_RECONNECT_DELAY_MS = 1000;
const XAI_REALTIME_STT_MAX_QUEUED_BYTES = 2 * 1024 * 1024;
function readRecord(value: unknown): Record<string, unknown> | undefined {
return value && typeof value === "object" ? (value as Record<string, unknown>) : undefined;
}
function readNestedXaiConfig(rawConfig: RealtimeTranscriptionProviderConfig) {
const raw = readRecord(rawConfig);
const providers = readRecord(raw?.providers);
return readRecord(providers?.xai ?? raw?.xai ?? raw) ?? {};
}
function readFiniteNumber(value: unknown): number | undefined {
const next =
typeof value === "number"
? value
: typeof value === "string"
? Number.parseFloat(value)
: undefined;
return Number.isFinite(next) ? next : undefined;
}
function readBoolean(value: unknown): boolean | undefined {
if (typeof value === "boolean") {
return value;
}
if (typeof value !== "string") {
return undefined;
}
const normalized = value.trim().toLowerCase();
if (["1", "true", "yes", "on"].includes(normalized)) {
return true;
}
if (["0", "false", "no", "off"].includes(normalized)) {
return false;
}
return undefined;
}
function normalizeEncoding(value: unknown): XaiRealtimeTranscriptionEncoding | undefined {
const normalized = normalizeOptionalString(value)?.toLowerCase();
if (!normalized) {
return undefined;
}
if (normalized === "ulaw" || normalized === "g711_ulaw" || normalized === "g711-mulaw") {
return "mulaw";
}
if (normalized === "g711_alaw" || normalized === "g711-alaw") {
return "alaw";
}
if (normalized === "pcm" || normalized === "mulaw" || normalized === "alaw") {
return normalized;
}
throw new Error(`Invalid xAI realtime transcription encoding: ${normalized}`);
}
function normalizeXaiRealtimeBaseUrl(value?: string): string {
return normalizeOptionalString(value ?? process.env.XAI_BASE_URL) ?? XAI_BASE_URL;
}
function toXaiRealtimeWsUrl(config: XaiRealtimeTranscriptionSessionConfig): string {
const url = new URL(normalizeXaiRealtimeBaseUrl(config.baseUrl));
url.protocol = url.protocol === "http:" ? "ws:" : "wss:";
url.pathname = `${url.pathname.replace(/\/+$/, "")}/stt`;
url.searchParams.set("sample_rate", String(config.sampleRate));
url.searchParams.set("encoding", config.encoding);
url.searchParams.set("interim_results", String(config.interimResults));
url.searchParams.set("endpointing", String(config.endpointingMs));
if (config.language) {
url.searchParams.set("language", config.language);
}
return url.toString();
}
function normalizeProviderConfig(
config: RealtimeTranscriptionProviderConfig,
): XaiRealtimeTranscriptionProviderConfig {
const raw = readNestedXaiConfig(config);
return {
apiKey: normalizeResolvedSecretInputString({
value: raw.apiKey,
path: "plugins.entries.voice-call.config.streaming.providers.xai.apiKey",
}),
baseUrl: normalizeOptionalString(raw.baseUrl),
sampleRate: readFiniteNumber(raw.sampleRate ?? raw.sample_rate),
encoding: normalizeEncoding(raw.encoding),
interimResults: readBoolean(raw.interimResults ?? raw.interim_results),
endpointingMs: readFiniteNumber(raw.endpointingMs ?? raw.endpointing ?? raw.silenceDurationMs),
language: normalizeOptionalString(raw.language),
};
}
function readErrorDetail(value: unknown): string {
if (typeof value === "string") {
return value;
}
const record = readRecord(value);
const message = normalizeOptionalString(record?.message);
const code = normalizeOptionalString(record?.code);
return message ?? code ?? "xAI realtime transcription error";
}
function readTranscriptText(event: XaiRealtimeTranscriptionEvent): string | undefined {
return normalizeOptionalString(event.text ?? event.transcript);
}
function createXaiRealtimeTranscriptionSession(
config: XaiRealtimeTranscriptionSessionConfig,
): RealtimeTranscriptionSession {
let lastTranscript: string | undefined;
let speechStarted = false;
const emitTranscript = (text: string) => {
if (text === lastTranscript) {
return;
}
lastTranscript = text;
config.onTranscript?.(text);
};
const handleEvent = (
event: XaiRealtimeTranscriptionEvent,
transport: RealtimeTranscriptionWebSocketTransport,
) => {
if (event.type === "transcript.created") {
transport.markReady();
return;
}
if (!transport.isReady() && event.type === "error") {
transport.failConnect(new Error(readErrorDetail(event.error ?? event.message)));
return;
}
switch (event.type) {
case "transcript.partial": {
const text = readTranscriptText(event);
if (!text) {
return;
}
if (!speechStarted) {
speechStarted = true;
config.onSpeechStart?.();
}
if (event.is_final && event.speech_final) {
emitTranscript(text);
speechStarted = false;
return;
}
config.onPartial?.(text);
return;
}
case "transcript.done": {
const text = readTranscriptText(event);
if (text) {
emitTranscript(text);
}
transport.closeNow();
return;
}
case "error":
config.onError?.(new Error(readErrorDetail(event.error ?? event.message)));
return;
default:
return;
}
};
return createRealtimeTranscriptionWebSocketSession<XaiRealtimeTranscriptionEvent>({
providerId: "xai",
callbacks: config,
url: () => toXaiRealtimeWsUrl(config),
headers: { Authorization: `Bearer ${config.apiKey}` },
connectTimeoutMs: XAI_REALTIME_STT_CONNECT_TIMEOUT_MS,
closeTimeoutMs: XAI_REALTIME_STT_CLOSE_TIMEOUT_MS,
maxReconnectAttempts: XAI_REALTIME_STT_MAX_RECONNECT_ATTEMPTS,
reconnectDelayMs: XAI_REALTIME_STT_RECONNECT_DELAY_MS,
maxQueuedBytes: XAI_REALTIME_STT_MAX_QUEUED_BYTES,
connectTimeoutMessage: "xAI realtime transcription connection timeout",
reconnectLimitMessage: "xAI realtime transcription reconnect limit reached",
sendAudio: (audio, transport) => {
transport.sendBinary(audio);
},
onClose: (transport) => {
transport.sendJson({ type: "audio.done" });
},
onMessage: handleEvent,
});
}
export function buildXaiRealtimeTranscriptionProvider(): RealtimeTranscriptionProviderPlugin {
return {
id: "xai",
label: "xAI Realtime Transcription",
aliases: ["xai-realtime", "grok-stt-streaming"],
autoSelectOrder: 25,
resolveConfig: ({ rawConfig }) => normalizeProviderConfig(rawConfig),
isConfigured: ({ providerConfig }) =>
Boolean(normalizeProviderConfig(providerConfig).apiKey || process.env.XAI_API_KEY),
createSession: (req) => {
const config = normalizeProviderConfig(req.providerConfig);
const apiKey = config.apiKey || process.env.XAI_API_KEY;
if (!apiKey) {
throw new Error("xAI API key missing");
}
return createXaiRealtimeTranscriptionSession({
...req,
apiKey,
baseUrl: normalizeXaiRealtimeBaseUrl(config.baseUrl),
sampleRate: config.sampleRate ?? XAI_REALTIME_STT_DEFAULT_SAMPLE_RATE,
encoding: config.encoding ?? XAI_REALTIME_STT_DEFAULT_ENCODING,
interimResults: config.interimResults ?? true,
endpointingMs: config.endpointingMs ?? XAI_REALTIME_STT_DEFAULT_ENDPOINTING_MS,
language: config.language,
});
},
};
}
¤ Dauer der Verarbeitung: 0.28 Sekunden
(vorverarbeitet am 2026-04-27)
¤
*© Formatika GbR, Deutschland
|
|
2026-05-26
|
|
|
|
|