Quelle HttpInference.sys.mjs

Sprache: C

Spracherkennung für: .mjs vermutete Sprache: Unknown {[0] [0] [0]} [Methode: Schwerpunktbildung, einfache Gewichte, sechs Dimensionen]

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */

// Guide models to respond with readable / parseable JSON-ish grammar. Allow
// some whitespace to avoid token resampling while ensuring escaped strings.
const JSON_GRAMMAR = `root ::= ws? "{" ws* key ( "," ws* key )* ws* "}" ws?
arr ::= "[" ws* str ( "," ws* str )* ws* "]" ws?
key ::= str ":" ws? val
str ::= ["] ( "\\\\" ["n] | [^\\\\"\\n] )+ ["] ws?
val ::= arr | str
ws  ::= [ \n]
`;

/**
* Handle various kinds of ai/ml http inference APIs.
*/
export const HttpInference = {
  /**
   * GenAI prompt completion
   *
   * @param {object} config options for the request
   * @param {string} config.endpoint http completion API
   * @param {string} config.prompt to send
   * @param {string} config.bearer optional token for some endpoints
   * @param {string} config.model optional for some endpoints
   * @param {Function} config.onStream optional callback for streaming response
   * @param {object} context optional placeholder values
   * @returns {Promise<string>} response of the completion request
   */
  async completion(
    { bearer, endpoint, model, prompt, onStream },
    context = {}
  ) {
    let request, response;

    // Try to get JSON response if prompt includes "json"
    const expectJSON = prompt.search(/\bjson\b/i) >= 0;

    // Conditionally add prompt context if needed and allowed
    Object.entries(context).forEach(([key, val]) => {
      const placeholder = `%${key}%`;
      if (prompt.includes(placeholder)) {
        prompt = prompt.replace(placeholder, JSON.stringify(val));
      }
    });

    let streaming = !!onStream;

    // TODO: Pick a body format in a smarter way
    const body = {};
    if (endpoint.endsWith("/v1/chat/completions")) {
      body.messages = [{ content: prompt, role: "user" }];
      body.max_tokens = 1024;
      body.model = model;
      if (streaming) {
        body.stream = true;
      }
      if (expectJSON) {
        // TODO: Better deciding when to include grammar
        if (endpoint.includes("localhost")) {
          body.grammar = JSON_GRAMMAR;
        }
        body.response_format = { type: "json_object" };
      }
    } else if (endpoint.endsWith(":predict")) {
      body.instances = [{ content: prompt }];
      body.parameters = { maxOutputTokens: 1024 };
      streaming = false;
    } else if (endpoint.endsWith(":streamGenerateContent")) {
      body.contents = [{ parts: [{ text: prompt }], role: "user" }];
      body.generation_config = { maxOutputTokens: 1024 };
      // This endpoint doesn't do server-sent events format
      streaming = false;
    } else if (endpoint.endsWith("/completion")) {
      body.prompt = prompt;
      if (streaming) {
        body.stream = true;
      }
      if (expectJSON) {
        body.grammar = JSON_GRAMMAR;
      }
    } else {
      body.model = model;
      body.prompt = prompt;
      streaming = false;
    }

    const headers = {
      "Content-Type": "application/json",
    };
    if (bearer) {
      headers.Authorization = `Bearer ${bearer}`;
    }

    let ret = "";
    try {
      request = await fetch(endpoint, {
        body: JSON.stringify(body),
        headers,
        method: "POST",
      });

      if (request.status != 200) {
        throw await request.text();
      }

      if (streaming) {
        const reader = request.body.getReader();
        const decoder = new TextDecoder();
        // eslint-disable-next-line no-constant-condition
        while (true) {
          const { done, value } = await reader.read();
          if (done) {
            break;
          }

          // Read the JSON data of each server-sent event
          const lines = decoder
            .decode(value)
            .split("\n")
            .filter(l => l);
          for (const line of lines) {
            try {
              response = JSON.parse(line.replace(/^data: /, ""));
              const chunk =
                response.content ?? response.choices?.[0].delta.content;
              if (chunk?.length) {
                // Accumulate chunks for partial and final value
                ret += chunk;
                onStream(ret);
              }
            } catch (ex) {}
          }
        }
      } else {
        response = await request.json();
        ret =
          response.response ??
          response.content ??
          response.choices?.[0].message.content ??
          response.predictions?.[0].content ??
          response.map(r => r.candidates[0].content.parts[0].text).join("");

        // Some wrap JSON responses in code block
        if (expectJSON) {
          ret = ret.replace(/^\s*```\s*(json)?/i, "").replace(/```\s*$/, "");
        }
      }
    } catch (ex) {
      ret = [endpoint, request?.status, ex, JSON.stringify(response)].join(
        "\n\n"
      );
    }

    return ret;
  },
};

¤ Dauer der Verarbeitung: 0.22 Sekunden (vorverarbeitet am 2026-06-10) ¤

Wurzel

Suchen

PVS Prover

Isabelle Prover

NIST Cobol Testsuite

Cephes Mathematical Library

Vienna Development Method

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.