Ziele Untersuchung
mit Columbo Integrität von
Datenbanken Interaktion und
Portierbarkeit Ergonomie der
Schnittstellen

Angebot Produkte Projekt Beratung

Mittel Analytik Modellierung Sprachen Algebra Logik Hardware Denken Kreativität

Zusammenhänge Gesellschaft Wirtschaft Branche Firma


products/sources/formale Sprachen/C/Firefox/toolkit/components/ml/content/ (Browser von der Mozilla Stiftung Version 136.0.1^©) Datei vom 10.2.2025 mit Größe 4 kB

SSL HttpInference.sys.mjs Sprache: unbekannt

/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at https://mozilla.org/MPL/2.0/. */

// Guide models to respond with readable / parseable JSON-ish grammar. Allow
// some whitespace to avoid token resampling while ensuring escaped strings.
const JSON_GRAMMAR = `root ::= ws? "{" ws* key ( "," ws* key )* ws* "}" ws?
arr ::= "[" ws* str ( "," ws* str )* ws* "]" ws?
key ::= str ":" ws? val
str ::= ["] ( "\\\\" ["n] | [^\\\\"\\n] )+ ["] ws?
val ::= arr | str
ws  ::= [ \n]
`;

/**
* Handle various kinds of ai/ml http inference APIs.
*/
export const HttpInference = {
  /**
   * GenAI prompt completion
   *
   * @param {object} config options for the request
   * @param {string} config.endpoint http completion API
   * @param {string} config.prompt to send
   * @param {string} config.bearer optional token for some endpoints
   * @param {string} config.model optional for some endpoints
   * @param {Function} config.onStream optional callback for streaming response
   * @param {object} context optional placeholder values
   * @returns {Promise<string>} response of the completion request
   */
  async completion(
    { bearer, endpoint, model, prompt, onStream },
    context = {}
  ) {
    let request, response;

    // Try to get JSON response if prompt includes "json"
    const expectJSON = prompt.search(/\bjson\b/i) >= 0;

    // Conditionally add prompt context if needed and allowed
    Object.entries(context).forEach(([key, val]) => {
      const placeholder = `%${key}%`;
      if (prompt.includes(placeholder)) {
        prompt = prompt.replace(placeholder, JSON.stringify(val));
      }
    });

    let streaming = !!onStream;

    // TODO: Pick a body format in a smarter way
    const body = {};
    if (endpoint.endsWith("/v1/chat/completions")) {
      body.messages = [{ content: prompt, role: "user" }];
      body.max_tokens = 1024;
      body.model = model;
      if (streaming) {
        body.stream = true;
      }
      if (expectJSON) {
        // TODO: Better deciding when to include grammar
        if (endpoint.includes("localhost")) {
          body.grammar = JSON_GRAMMAR;
        }
        body.response_format = { type: "json_object" };
      }
    } else if (endpoint.endsWith(":predict")) {
      body.instances = [{ content: prompt }];
      body.parameters = { maxOutputTokens: 1024 };
      streaming = false;
    } else if (endpoint.endsWith(":streamGenerateContent")) {
      body.contents = [{ parts: [{ text: prompt }], role: "user" }];
      body.generation_config = { maxOutputTokens: 1024 };
      // This endpoint doesn't do server-sent events format
      streaming = false;
    } else if (endpoint.endsWith("/completion")) {
      body.prompt = prompt;
      if (streaming) {
        body.stream = true;
      }
      if (expectJSON) {
        body.grammar = JSON_GRAMMAR;
      }
    } else {
      body.model = model;
      body.prompt = prompt;
      streaming = false;
    }

    const headers = {
      "Content-Type": "application/json",
    };
    if (bearer) {
      headers.Authorization = `Bearer ${bearer}`;
    }

    let ret = "";
    try {
      request = await fetch(endpoint, {
        body: JSON.stringify(body),
        headers,
        method: "POST",
      });

      if (request.status != 200) {
        throw await request.text();
      }

      if (streaming) {
        const reader = request.body.getReader();
        const decoder = new TextDecoder();
        // eslint-disable-next-line no-constant-condition
        while (true) {
          const { done, value } = await reader.read();
          if (done) {
            break;
          }

          // Read the JSON data of each server-sent event
          const lines = decoder
            .decode(value)
            .split("\n")
            .filter(l => l);
          for (const line of lines) {
            try {
              response = JSON.parse(line.replace(/^data: /, ""));
              const chunk =
                response.content ?? response.choices?.[0].delta.content;
              if (chunk?.length) {
                // Accumulate chunks for partial and final value
                ret += chunk;
                onStream(ret);
              }
            } catch (ex) {}
          }
        }
      } else {
        response = await request.json();
        ret =
          response.response ??
          response.content ??
          response.choices?.[0].message.content ??
          response.predictions?.[0].content ??
          response.map(r => r.candidates[0].content.parts[0].text).join("");

        // Some wrap JSON responses in code block
        if (expectJSON) {
          ret = ret.replace(/^\s*```\s*(json)?/i, "").replace(/```\s*$/, "");
        }
      }
    } catch (ex) {
      ret = [endpoint, request?.status, ex, JSON.stringify(response)].join(
        "\n\n"
      );
    }

    return ret;
  },
};