Quelle AppleVTEncoder.cpp Sprache: C

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "AppleVTEncoder.h"

#include <CoreFoundation/CFArray.h>
#include <CoreFoundation/CFByteOrder.h>
#include <CoreFoundation/CFDictionary.h>
#include <MacTypes.h>

#include "AnnexB.h"
#include "AppleUtils.h"
#include "H264.h"
#include "ImageContainer.h"

#include "mozilla/dom/BindingUtils.h"
#include "mozilla/dom/ImageUtils.h"

namespace mozilla {
extern LazyLogModule sPEMLog;
#define LOGE(fmt, ...)                       \
  MOZ_LOG(sPEMLog, mozilla::LogLevel::Error, \
          ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__))
#define LOGW(fmt, ...)                         \
  MOZ_LOG(sPEMLog, mozilla::LogLevel::Warning, \
          ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__))
#define LOGD(fmt, ...)                       \
  MOZ_LOG(sPEMLog, mozilla::LogLevel::Debug, \
          ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__))
#define LOGV(fmt, ...)                         \
  MOZ_LOG(sPEMLog, mozilla::LogLevel::Verbose, \
          ("[AppleVTEncoder] %s: " fmt, __func__, ##__VA_ARGS__))

static CFDictionaryRef BuildEncoderSpec(const bool aHardwareNotAllowed,
                                        const bool aLowLatencyRateControl) {
  if (__builtin_available(macos 11.3, *)) {
    if (aLowLatencyRateControl) {
      // If doing low-latency rate control, the hardware encoder is required.
      const void* keys[] = {
          kVTVideoEncoderSpecification_RequireHardwareAcceleratedVideoEncoder,
          kVTVideoEncoderSpecification_EnableLowLatencyRateControl};
      const void* values[] = {kCFBooleanTrue, kCFBooleanTrue};

      static_assert(std::size(keys) == std::size(values),
                    "Non matching keys/values array size");
      return CFDictionaryCreate(kCFAllocatorDefault, keys, values,
                                std::size(keys), &kCFTypeDictionaryKeyCallBacks,
                                &kCFTypeDictionaryValueCallBacks);
    }
  }
  const void* keys[] = {
      kVTVideoEncoderSpecification_EnableHardwareAcceleratedVideoEncoder};
  const void* values[] = {aHardwareNotAllowed ? kCFBooleanFalse
                                              : kCFBooleanTrue};

  static_assert(std::size(keys) == std::size(values),
                "Non matching keys/values array size");
  return CFDictionaryCreate(kCFAllocatorDefault, keys, values, std::size(keys),
                            &kCFTypeDictionaryKeyCallBacks,
                            &kCFTypeDictionaryValueCallBacks);
}

static void FrameCallback(void* aEncoder, void* aFrameRefCon, OSStatus aStatus,
                          VTEncodeInfoFlags aInfoFlags,
                          CMSampleBufferRef aSampleBuffer) {
  (static_cast<AppleVTEncoder*>(aEncoder))
      ->OutputFrame(aStatus, aInfoFlags, aSampleBuffer);
}

static bool SetAverageBitrate(VTCompressionSessionRef& aSession,
                              uint32_t aBitsPerSec) {
  int64_t bps(aBitsPerSec);
  AutoCFRelease<CFNumberRef> bitrate(
      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &bps));
  return VTSessionSetProperty(aSession,
                              kVTCompressionPropertyKey_AverageBitRate,
                              bitrate) == noErr;
}

static bool SetConstantBitrate(VTCompressionSessionRef& aSession,
                               uint32_t aBitsPerSec) {
  int32_t bps(aBitsPerSec);
  AutoCFRelease<CFNumberRef> bitrate(
      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &bps));

  if (__builtin_available(macos 13.0, *)) {
    int rv = VTSessionSetProperty(aSession,
                                  kVTCompressionPropertyKey_ConstantBitRate,
                                  bitrate) == noErr;
    if (rv == kVTPropertyNotSupportedErr) {
      LOGE("Constant bitrate not supported.");
    }
  }
  return false;
}

static bool SetBitrateAndMode(VTCompressionSessionRef& aSession,
                              BitrateMode aBitrateMode, uint32_t aBitsPerSec) {
  if (aBitrateMode == BitrateMode::Variable) {
    return SetAverageBitrate(aSession, aBitsPerSec);
  }
  return SetConstantBitrate(aSession, aBitsPerSec);
}

static bool SetFrameRate(VTCompressionSessionRef& aSession, int64_t aFPS) {
  AutoCFRelease<CFNumberRef> framerate(
      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &aFPS));
  return VTSessionSetProperty(aSession,
                              kVTCompressionPropertyKey_ExpectedFrameRate,
                              framerate) == noErr;
}

static bool SetRealtime(VTCompressionSessionRef& aSession, bool aEnabled) {
  // B-frames has been disabled in Init(), so no need to set it here.

  CFBooleanRef enabled = aEnabled ? kCFBooleanTrue : kCFBooleanFalse;
  OSStatus status = VTSessionSetProperty(
      aSession, kVTCompressionPropertyKey_RealTime, enabled);
  LOGD("%s real time, status: %d", aEnabled ? "Enable" : "Disable", status);
  if (status != noErr) {
    return false;
  }

  if (__builtin_available(macos 11.0, *)) {
    status = VTSessionSetProperty(
        aSession, kVTCompressionPropertyKey_PrioritizeEncodingSpeedOverQuality,
        enabled);
    LOGD("%s PrioritizeEncodingSpeedOverQuality, status: %d",
         aEnabled ? "Enable" : "Disable", status);
    if (status != noErr && status != kVTPropertyNotSupportedErr) {
      return false;
    }
  }

  int32_t maxFrameDelayCount = aEnabled ? 0 : kVTUnlimitedFrameDelayCount;
  AutoCFRelease<CFNumberRef> cf(CFNumberCreate(
      kCFAllocatorDefault, kCFNumberSInt32Type, &maxFrameDelayCount));
  status = VTSessionSetProperty(
      aSession, kVTCompressionPropertyKey_MaxFrameDelayCount, cf);
  LOGD("Set max frame delay count to %d, status: %d", maxFrameDelayCount,
       status);
  if (status != noErr && status != kVTPropertyNotSupportedErr) {
    return false;
  }

  return true;
}

static bool SetProfileLevel(VTCompressionSessionRef& aSession,
                            H264_PROFILE aValue) {
  CFStringRef profileLevel = nullptr;
  switch (aValue) {
    case H264_PROFILE::H264_PROFILE_BASE:
      profileLevel = kVTProfileLevel_H264_Baseline_AutoLevel;
      break;
    case H264_PROFILE::H264_PROFILE_MAIN:
      profileLevel = kVTProfileLevel_H264_Main_AutoLevel;
      break;
    case H264_PROFILE::H264_PROFILE_HIGH:
      profileLevel = kVTProfileLevel_H264_High_AutoLevel;
      break;
    default:
      LOGE("Profile %d not handled", static_cast<int>(aValue));
  }

  return profileLevel ? VTSessionSetProperty(
                            aSession, kVTCompressionPropertyKey_ProfileLevel,
                            profileLevel) == noErr
                      : false;
}

RefPtr<MediaDataEncoder::InitPromise> AppleVTEncoder::Init() {
  MOZ_ASSERT(!mInited, "Cannot initialize encoder again without shutting down");

  if (mConfig.mSize.width == 0 || mConfig.mSize.height == 0) {
    LOGE("width or height 0 in encoder init");
    return InitPromise::CreateAndReject(NS_ERROR_ILLEGAL_VALUE, __func__);
  }

  if (mConfig.mScalabilityMode != ScalabilityMode::None && !OSSupportsSVC()) {
    LOGE("SVC only supported on macOS 11.3 and more recent");
    return InitPromise::CreateAndReject(
        MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR,
                    "SVC only supported on macOS 11.3 and more recent"),
        __func__);
  }

  bool lowLatencyRateControl =
      mConfig.mUsage == Usage::Realtime ||
      mConfig.mScalabilityMode != ScalabilityMode::None;
  LOGD("low latency rate control: %s, Hardware allowed: %s",
       lowLatencyRateControl ? "yes" : "no",
       mHardwareNotAllowed ? "no" : "yes");
  AutoCFRelease<CFDictionaryRef> spec(
      BuildEncoderSpec(mHardwareNotAllowed, lowLatencyRateControl));
  AutoCFRelease<CFDictionaryRef> srcBufferAttr(
      BuildSourceImageBufferAttributes());
  if (!srcBufferAttr) {
    LOGE("Failed to create source buffer attr");
    return InitPromise::CreateAndReject(
        MediaResult(NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR,
                    "fail to create source buffer attributes"),
        __func__);
  }

  OSStatus status = VTCompressionSessionCreate(
      kCFAllocatorDefault, mConfig.mSize.width, mConfig.mSize.height,
      kCMVideoCodecType_H264, spec, srcBufferAttr, kCFAllocatorDefault,
      &FrameCallback, this /* outputCallbackRefCon */, &mSession);

  if (status != noErr) {
    LOGE("Failed to create compression session");
    return InitPromise::CreateAndReject(
        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                    "fail to create encoder session"),
        __func__);
  }

  if (VTSessionSetProperty(mSession,
                           kVTCompressionPropertyKey_AllowFrameReordering,
                           kCFBooleanFalse) != noErr) {
    LOGE("Couldn't disable bframes");
    return InitPromise::CreateAndReject(
        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Couldn't disable bframes"),
        __func__);
  }

  if (mConfig.mUsage == Usage::Realtime && !SetRealtime(mSession, true)) {
    LOGE("fail to configure realtime properties");
    return InitPromise::CreateAndReject(
        MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                    "fail to configure real-time"),
        __func__);
  }

  if (mConfig.mBitrate) {
    if (mConfig.mCodec == CodecType::H264 &&
        mConfig.mBitrateMode == BitrateMode::Constant) {
      // Not supported, fall-back to VBR.
      LOGD("H264 CBR not supported in VideoToolbox, falling back to VBR");
      mConfig.mBitrateMode = BitrateMode::Variable;
    }
    bool rv =
        SetBitrateAndMode(mSession, mConfig.mBitrateMode, mConfig.mBitrate);
    if (!rv) {
      LOGE("failed to set bitrate to %d and mode to %s", mConfig.mBitrate,
           mConfig.mBitrateMode == BitrateMode::Constant ? "constant"
                                                         : "variable");
      return InitPromise::CreateAndReject(
          MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                      "fail to configurate bitrate"),
          __func__);
    }
  }

  if (mConfig.mScalabilityMode != ScalabilityMode::None) {
    if (__builtin_available(macos 11.3, *)) {
      float baseLayerFPSRatio = 1.0f;
      switch (mConfig.mScalabilityMode) {
        case ScalabilityMode::L1T2:
          baseLayerFPSRatio = 0.5;
          break;
        case ScalabilityMode::L1T3:
          // Not supported in hw on macOS, but is accepted and errors out when
          // encoding. Reject the configuration now.
          LOGE("macOS only supports L1T2 h264 SVC");
          return InitPromise::CreateAndReject(
              MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                          nsPrintfCString("macOS only support L1T2 h264 SVC")),
              __func__);
        default:
          MOZ_ASSERT_UNREACHABLE("Unhandled value");
      }
      AutoCFRelease<CFNumberRef> cf(CFNumberCreate(
          kCFAllocatorDefault, kCFNumberFloatType, &baseLayerFPSRatio));
      if (VTSessionSetProperty(
              mSession, kVTCompressionPropertyKey_BaseLayerFrameRateFraction,
              cf)) {
        LOGE("Failed to set base layer framerate fraction to %f",
             baseLayerFPSRatio);
        return InitPromise::CreateAndReject(
            MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                        nsPrintfCString("fail to configure SVC (base ratio: %f",
                                        baseLayerFPSRatio)),
            __func__);
      }
    } else {
      LOGE("MacOS version too old to enable SVC");
      return InitPromise::CreateAndReject(
          MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                      "macOS version too old to enable SVC"),
          __func__);
    }
  }

  int64_t interval =
      mConfig.mKeyframeInterval > std::numeric_limits<int64_t>::max()
          ? std::numeric_limits<int64_t>::max()
          : AssertedCast<int64_t>(mConfig.mKeyframeInterval);
  AutoCFRelease<CFNumberRef> cf(
      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt64Type, &interval));
  if (VTSessionSetProperty(mSession,
                           kVTCompressionPropertyKey_MaxKeyFrameInterval,
                           cf) != noErr) {
    LOGE("Failed to set max keyframe interval");
    return InitPromise::CreateAndReject(
        MediaResult(
            NS_ERROR_DOM_MEDIA_FATAL_ERR,
            nsPrintfCString("fail to configurate keyframe interval:%" PRId64,
                            interval)),
        __func__);
  }

  if (mConfig.mCodecSpecific) {
    const H264Specific& specific = mConfig.mCodecSpecific->as<H264Specific>();
    if (!SetProfileLevel(mSession, specific.mProfile)) {
      LOGE("Failed to set profile level");
      return InitPromise::CreateAndReject(
          MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                      nsPrintfCString("fail to configurate profile level:%d",
                                      int(specific.mProfile))),
          __func__);
    }
  }

  AutoCFRelease<CFBooleanRef> isUsingHW = nullptr;
  status = VTSessionCopyProperty(
      mSession, kVTCompressionPropertyKey_UsingHardwareAcceleratedVideoEncoder,
      kCFAllocatorDefault, isUsingHW.receive());
  mIsHardwareAccelerated = status == noErr && isUsingHW == kCFBooleanTrue;
  LOGD("Using hw acceleration: %s", mIsHardwareAccelerated ? "yes" : "no");

  mError = NS_OK;
  return InitPromise::CreateAndResolve(true, __func__);
}

static Maybe<OSType> MapPixelFormat(dom::ImageBitmapFormat aFormat) {
  switch (aFormat) {
    case dom::ImageBitmapFormat::RGBA32:
      return Some(kCVPixelFormatType_32RGBA);
    case dom::ImageBitmapFormat::BGRA32:
      return Some(kCVPixelFormatType_32BGRA);
    case dom::ImageBitmapFormat::RGB24:
      return Some(kCVPixelFormatType_24RGB);
    case dom::ImageBitmapFormat::BGR24:
      return Some(kCVPixelFormatType_24BGR);
    case dom::ImageBitmapFormat::GRAY8:
      return Some(kCVPixelFormatType_OneComponent8);
    case dom::ImageBitmapFormat::YUV444P:
      return Some(kCVPixelFormatType_444YpCbCr8);
    case dom::ImageBitmapFormat::YUV420P:
      return Some(kCVPixelFormatType_420YpCbCr8PlanarFullRange);
    case dom::ImageBitmapFormat::YUV420SP_NV12:
      return Some(kCVPixelFormatType_420YpCbCr8BiPlanarFullRange);
    default:
      return Nothing();
  }
}

CFDictionaryRef AppleVTEncoder::BuildSourceImageBufferAttributes() {
  Maybe<OSType> fmt = MapPixelFormat(mConfig.mSourcePixelFormat);
  if (fmt.isNothing()) {
    LOGE("unsupported source pixel format");
    return nullptr;
  }

  // Source image buffer attributes
  const void* keys[] = {kCVPixelBufferOpenGLCompatibilityKey,  // TODO
                        kCVPixelBufferIOSurfacePropertiesKey,  // TODO
                        kCVPixelBufferPixelFormatTypeKey};

  AutoCFRelease<CFDictionaryRef> ioSurfaceProps(CFDictionaryCreate(
      kCFAllocatorDefault, nullptr, nullptr, 0, &kCFTypeDictionaryKeyCallBacks,
      &kCFTypeDictionaryValueCallBacks));
  AutoCFRelease<CFNumberRef> pixelFormat(
      CFNumberCreate(kCFAllocatorDefault, kCFNumberSInt32Type, &fmt));
  const void* values[] = {kCFBooleanTrue, ioSurfaceProps, pixelFormat};

  MOZ_ASSERT(std::size(keys) == std::size(values),
             "Non matching keys/values array size");

  return CFDictionaryCreate(kCFAllocatorDefault, keys, values, std::size(keys),
                            &kCFTypeDictionaryKeyCallBacks,
                            &kCFTypeDictionaryValueCallBacks);
}

static bool IsKeyframe(CMSampleBufferRef aSample) {
  CFArrayRef attachments = CMSampleBufferGetSampleAttachmentsArray(aSample, 0);
  if (attachments == nullptr || CFArrayGetCount(attachments) == 0) {
    return false;
  }

  return !CFDictionaryContainsKey(
      static_cast<CFDictionaryRef>(CFArrayGetValueAtIndex(attachments, 0)),
      kCMSampleAttachmentKey_NotSync);
}

static size_t GetNumParamSets(CMFormatDescriptionRef aDescription) {
  size_t numParamSets = 0;
  OSStatus status = CMVideoFormatDescriptionGetH264ParameterSetAtIndex(
      aDescription, 0, nullptr, nullptr, &numParamSets, nullptr);
  if (status != noErr) {
    LOGE("Cannot get number of parameter sets from format description");
  }

  return numParamSets;
}

static const uint8_t kNALUStart[4] = {0, 0, 0, 1};

static size_t GetParamSet(CMFormatDescriptionRef aDescription, size_t aIndex,
                          const uint8_t** aDataPtr) {
  size_t length = 0;
  int headerSize = 0;
  if (CMVideoFormatDescriptionGetH264ParameterSetAtIndex(
          aDescription, aIndex, aDataPtr, &length, nullptr, &headerSize) !=
      noErr) {
    LOGE("failed to get parameter set from format description");
    return 0;
  }
  MOZ_ASSERT(headerSize == sizeof(kNALUStart), "Only support 4 byte header");

  return length;
}

static bool WriteSPSPPS(MediaRawData* aDst,
                        CMFormatDescriptionRef aDescription) {
  // Get SPS/PPS
  const size_t numParamSets = GetNumParamSets(aDescription);
  UniquePtr<MediaRawDataWriter> writer(aDst->CreateWriter());
  for (size_t i = 0; i < numParamSets; i++) {
    const uint8_t* data = nullptr;
    size_t length = GetParamSet(aDescription, i, &data);
    if (length == 0) {
      return false;
    }
    if (!writer->Append(kNALUStart, sizeof(kNALUStart))) {
      LOGE("Cannot write NAL unit start code");
      return false;
    }
    if (!writer->Append(data, length)) {
      LOGE("Cannot write parameter set");
      return false;
    }
  }
  return true;
}

static RefPtr<MediaByteBuffer> extractAvcc(
    CMFormatDescriptionRef aDescription) {
  CFPropertyListRef list = CMFormatDescriptionGetExtension(
      aDescription,
      kCMFormatDescriptionExtension_SampleDescriptionExtensionAtoms);
  if (!list) {
    LOGE("fail to get atoms");
    return nullptr;
  }
  CFDataRef avcC = static_cast<CFDataRef>(
      CFDictionaryGetValue(static_cast<CFDictionaryRef>(list), CFSTR("avcC")));
  if (!avcC) {
    LOGE("fail to extract avcC");
    return nullptr;
  }
  CFIndex length = CFDataGetLength(avcC);
  const UInt8* bytes = CFDataGetBytePtr(avcC);
  if (length <= 0 || !bytes) {
    LOGE("empty avcC");
    return nullptr;
  }

  RefPtr<MediaByteBuffer> config = new MediaByteBuffer(length);
  config->AppendElements(bytes, length);
  return config;
}

bool AppleVTEncoder::WriteExtraData(MediaRawData* aDst, CMSampleBufferRef aSrc,
                                    const bool aAsAnnexB) {
  if (!IsKeyframe(aSrc)) {
    return true;
  }

  aDst->mKeyframe = true;
  CMFormatDescriptionRef desc = CMSampleBufferGetFormatDescription(aSrc);
  if (!desc) {
    LOGE("fail to get format description from sample");
    return false;
  }

  if (aAsAnnexB) {
    return WriteSPSPPS(aDst, desc);
  }

  RefPtr<MediaByteBuffer> avcc = extractAvcc(desc);
  if (!avcc) {
    return false;
  }

  if (!mAvcc || !H264::CompareExtraData(avcc, mAvcc)) {
    mAvcc = avcc;
    aDst->mExtraData = mAvcc;
  }

  return avcc != nullptr;
}

static bool WriteNALUs(MediaRawData* aDst, CMSampleBufferRef aSrc,
                       bool aAsAnnexB = false) {
  size_t srcRemaining = CMSampleBufferGetTotalSampleSize(aSrc);
  CMBlockBufferRef block = CMSampleBufferGetDataBuffer(aSrc);
  if (!block) {
    LOGE("Cannot get block buffer frome sample");
    return false;
  }
  UniquePtr<MediaRawDataWriter> writer(aDst->CreateWriter());
  size_t writtenLength = aDst->Size();
  // Ensure capacity.
  if (!writer->SetSize(writtenLength + srcRemaining)) {
    LOGE("Cannot allocate buffer");
    return false;
  }
  size_t readLength = 0;
  while (srcRemaining > 0) {
    // Extract the size of next NAL unit
    uint8_t unitSizeBytes[4];
    MOZ_ASSERT(srcRemaining > sizeof(unitSizeBytes));
    if (CMBlockBufferCopyDataBytes(block, readLength, sizeof(unitSizeBytes),
                                   reinterpret_cast<uint32_t*>(
                                       unitSizeBytes)) != kCMBlockBufferNoErr) {
      LOGE("Cannot copy unit size bytes");
      return false;
    }
    size_t unitSize =
        CFSwapInt32BigToHost(*reinterpret_cast<uint32_t*>(unitSizeBytes));

    if (aAsAnnexB) {
      // Replace unit size bytes with NALU start code.
      PodCopy(writer->Data() + writtenLength, kNALUStart, sizeof(kNALUStart));
      readLength += sizeof(unitSizeBytes);
      srcRemaining -= sizeof(unitSizeBytes);
      writtenLength += sizeof(kNALUStart);
    } else {
      // Copy unit size bytes + data.
      unitSize += sizeof(unitSizeBytes);
    }
    MOZ_ASSERT(writtenLength + unitSize <= aDst->Size());
    // Copy NAL unit data
    if (CMBlockBufferCopyDataBytes(block, readLength, unitSize,
                                   writer->Data() + writtenLength) !=
        kCMBlockBufferNoErr) {
      LOGE("Cannot copy unit data");
      return false;
    }
    readLength += unitSize;
    srcRemaining -= unitSize;
    writtenLength += unitSize;
  }
  MOZ_ASSERT(writtenLength == aDst->Size());
  return true;
}

void AppleVTEncoder::OutputFrame(OSStatus aStatus, VTEncodeInfoFlags aFlags,
                                 CMSampleBufferRef aBuffer) {
  LOGV("status: %d, flags: %d, buffer %p", aStatus, aFlags, aBuffer);

  if (aStatus != noErr) {
    ProcessOutput(nullptr, EncodeResult::EncodeError);
    return;
  }

  if (aFlags & kVTEncodeInfo_FrameDropped) {
    ProcessOutput(nullptr, EncodeResult::FrameDropped);
    return;
  }

  if (!aBuffer) {
    ProcessOutput(nullptr, EncodeResult::EmptyBuffer);
    return;
  }

  RefPtr<MediaRawData> output(new MediaRawData());

  if (__builtin_available(macos 11.3, *)) {
    if (mConfig.mScalabilityMode != ScalabilityMode::None) {
      CFDictionaryRef dict = (CFDictionaryRef)(CFArrayGetValueAtIndex(
          CMSampleBufferGetSampleAttachmentsArray(aBuffer, true), 0));
      CFBooleanRef isBaseLayerRef = (CFBooleanRef)CFDictionaryGetValue(
          dict, (const void*)kCMSampleAttachmentKey_IsDependedOnByOthers);
      Boolean isBaseLayer = CFBooleanGetValue(isBaseLayerRef);
      output->mTemporalLayerId.emplace(isBaseLayer ? 0 : 1);
    }
  }

  bool forceAvcc = false;
  if (mConfig.mCodecSpecific->is<H264Specific>()) {
    forceAvcc = mConfig.mCodecSpecific->as<H264Specific>().mFormat ==
                H264BitStreamFormat::AVC;
  }
  bool asAnnexB = !forceAvcc;
  bool succeeded = WriteExtraData(output, aBuffer, asAnnexB) &&
                   WriteNALUs(output, aBuffer, asAnnexB);

  output->mTime = media::TimeUnit::FromSeconds(
      CMTimeGetSeconds(CMSampleBufferGetPresentationTimeStamp(aBuffer)));
  output->mDuration = media::TimeUnit::FromSeconds(
      CMTimeGetSeconds(CMSampleBufferGetOutputDuration(aBuffer)));
  LOGV("Make a %s output[time: %s, duration: %s]: %s",
       asAnnexB ? "AnnexB" : "AVCC", output->mTime.ToString().get(),
       output->mDuration.ToString().get(), succeeded ? "succeed" : "failed");
  ProcessOutput(succeeded ? std::move(output) : nullptr, EncodeResult::Success);
}

void AppleVTEncoder::ProcessOutput(RefPtr<MediaRawData>&& aOutput,
                                   EncodeResult aResult) {
  if (!mTaskQueue->IsCurrentThreadIn()) {
    LOGV("Dispatch ProcessOutput to task queue");
    nsresult rv = mTaskQueue->Dispatch(
        NewRunnableMethod<RefPtr<MediaRawData>, EncodeResult>(
            "AppleVTEncoder::ProcessOutput", this,
            &AppleVTEncoder::ProcessOutput, std::move(aOutput), aResult));
    MOZ_DIAGNOSTIC_ASSERT(NS_SUCCEEDED(rv));
    Unused << rv;
    return;
  }

  if (aResult != EncodeResult::Success) {
    switch (aResult) {
      case EncodeResult::EncodeError:
        mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Failed to encode");
        break;
      case EncodeResult::EmptyBuffer:
        mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Buffer is empty");
        break;
      case EncodeResult::FrameDropped:
        if (mConfig.mUsage == Usage::Realtime) {
          // Dropping a frame in real-time usage is okay.
          LOGW("Frame is dropped");
        } else {
          // Some usages like transcoding should not drop a frame.
          LOGE("Frame is dropped");
          mError =
              MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "Frame is dropped");
        }
        break;
      default:
        MOZ_ASSERT_UNREACHABLE("Unknown EncodeResult");
        break;
    }
    MaybeResolveOrRejectEncodePromise();
    return;
  }

  LOGV("Got %zu bytes of output", !aOutput.get() ? 0 : aOutput->Size());

  if (!aOutput) {
    mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR, "No converted output");
    MaybeResolveOrRejectEncodePromise();
    return;
  }

  mEncodedData.AppendElement(std::move(aOutput));
  MaybeResolveOrRejectEncodePromise();
}

RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::Encode(
    const MediaData* aSample) {
  MOZ_ASSERT(aSample != nullptr);

  RefPtr<const VideoData> sample(aSample->As<const VideoData>());

  RefPtr<AppleVTEncoder> self = this;
  return InvokeAsync(mTaskQueue, __func__, [self, this, sample] {
    MOZ_ASSERT(mEncodePromise.IsEmpty(),
               "Encode should not be called again before getting results");
    RefPtr<EncodePromise> p = mEncodePromise.Ensure(__func__);
    ProcessEncode(sample);
    return p;
  });
}

RefPtr<MediaDataEncoder::ReconfigurationPromise> AppleVTEncoder::Reconfigure(
    const RefPtr<const EncoderConfigurationChangeList>& aConfigurationChanges) {
  return InvokeAsync(mTaskQueue, this, __func__,
                     &AppleVTEncoder::ProcessReconfigure,
                     aConfigurationChanges);
}

void AppleVTEncoder::ProcessEncode(const RefPtr<const VideoData>& aSample) {
  LOGV("::ProcessEncode");
  AssertOnTaskQueue();
  MOZ_ASSERT(mSession);

  if (NS_FAILED(mError)) {
    LOGE("Pending error: %s", mError.Description().get());
    MaybeResolveOrRejectEncodePromise();
  }

  AutoCVBufferRelease<CVImageBufferRef> buffer(
      CreateCVPixelBuffer(aSample->mImage));
  if (!buffer) {
    LOGE("Failed to allocate buffer");
    mError = MediaResult(NS_ERROR_OUT_OF_MEMORY, "failed to allocate buffer");
    MaybeResolveOrRejectEncodePromise();
    return;
  }

  CFDictionaryRef frameProps = nullptr;
  if (aSample->mKeyframe) {
    CFTypeRef keys[] = {kVTEncodeFrameOptionKey_ForceKeyFrame};
    CFTypeRef values[] = {kCFBooleanTrue};
    MOZ_ASSERT(std::size(keys) == std::size(values));
    frameProps = CFDictionaryCreate(
        kCFAllocatorDefault, keys, values, std::size(keys),
        &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
  };

  VTEncodeInfoFlags info;
  OSStatus status = VTCompressionSessionEncodeFrame(
      mSession, buffer,
      CMTimeMake(aSample->mTime.ToMicroseconds(), USECS_PER_S),
      CMTimeMake(aSample->mDuration.ToMicroseconds(), USECS_PER_S), frameProps,
      nullptr /* sourceFrameRefcon */, &info);
  if (status != noErr) {
    LOGE("VTCompressionSessionEncodeFrame error: %d", status);
    mError = MediaResult(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                         "VTCompressionSessionEncodeFrame error");
    MaybeResolveOrRejectEncodePromise();
    return;
  }

  if (mConfig.mUsage != Usage::Realtime) {
    MaybeResolveOrRejectEncodePromise();
    return;
  }

  // The latency between encoding a sample and receiving the encoded output is
  // critical in real-time usage. To minimize the latency, the output result
  // should be returned immediately once they are ready, instead of being
  // returned in the next or later Encode() iterations.
  LOGV("Encoding in progress");

  // Workaround for real-time encoding in OS versions < 11.
  ForceOutputIfNeeded();
}

RefPtr<MediaDataEncoder::ReconfigurationPromise>
AppleVTEncoder::ProcessReconfigure(
    const RefPtr<const EncoderConfigurationChangeList>& aConfigurationChanges) {
  bool ok = false;
  for (const auto& confChange : aConfigurationChanges->mChanges) {
    // A reconfiguration on the fly succeeds if all changes can be applied
    // successfuly. In case of failure, the encoder will be drained and
    // recreated.
    ok &= confChange.match(
        // Not supported yet
        [&](const DimensionsChange& aChange) -> bool { return false; },
        [&](const DisplayDimensionsChange& aChange) -> bool { return false; },
        [&](const BitrateModeChange& aChange) -> bool {
          mConfig.mBitrateMode = aChange.get();
          return SetBitrateAndMode(mSession, mConfig.mBitrateMode,
                                   mConfig.mBitrate);
        },
        [&](const BitrateChange& aChange) -> bool {
          mConfig.mBitrate = aChange.get().refOr(0);
          // 0 is the default in AppleVTEncoder: the encoder chooses the bitrate
          // based on the content.
          return SetBitrateAndMode(mSession, mConfig.mBitrateMode,
                                   mConfig.mBitrate);
        },
        [&](const FramerateChange& aChange) -> bool {
          // 0 means default, in VideoToolbox, and is valid, perform some light
          // sanitation on other values.
          double fps = aChange.get().refOr(0);
          if (std::isnan(fps) || fps < 0 ||
              int64_t(fps) > std::numeric_limits<int32_t>::max()) {
            LOGE("Invalid fps of %lf", fps);
            return false;
          }
          return SetFrameRate(mSession, AssertedCast<int64_t>(fps));
        },
        [&](const UsageChange& aChange) -> bool {
          mConfig.mUsage = aChange.get();
          return SetRealtime(mSession, aChange.get() == Usage::Realtime);
        },
        [&](const ContentHintChange& aChange) -> bool { return false; },
        [&](const SampleRateChange& aChange) -> bool { return false; },
        [&](const NumberOfChannelsChange& aChange) -> bool { return false; });
  };
  using P = MediaDataEncoder::ReconfigurationPromise;
  if (ok) {
    return P::CreateAndResolve(true, __func__);
  }
  return P::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR, __func__);
}

static size_t NumberOfPlanes(dom::ImageBitmapFormat aPixelFormat) {
  switch (aPixelFormat) {
    case dom::ImageBitmapFormat::RGBA32:
    case dom::ImageBitmapFormat::BGRA32:
    case dom::ImageBitmapFormat::RGB24:
    case dom::ImageBitmapFormat::BGR24:
    case dom::ImageBitmapFormat::GRAY8:
      return 1;
    case dom::ImageBitmapFormat::YUV444P:
    case dom::ImageBitmapFormat::YUV420P:
      return 3;
    case dom::ImageBitmapFormat::YUV420SP_NV12:
      return 2;
    default:
      LOGE("Unsupported input pixel format");
      return 0;
  }
}

using namespace layers;

static void ReleaseSurface(void* aReleaseRef, const void* aBaseAddress) {
  RefPtr<gfx::DataSourceSurface> released =
      dont_AddRef(static_cast<gfx::DataSourceSurface*>(aReleaseRef));
}

static void ReleaseImage(void* aImageGrip, const void* aDataPtr,
                         size_t aDataSize, size_t aNumOfPlanes,
                         const void** aPlanes) {
  (static_cast<PlanarYCbCrImage*>(aImageGrip))->Release();
}

CVPixelBufferRef AppleVTEncoder::CreateCVPixelBuffer(Image* aSource) {
  AssertOnTaskQueue();

  if (aSource->GetFormat() == ImageFormat::PLANAR_YCBCR) {
    PlanarYCbCrImage* image = aSource->AsPlanarYCbCrImage();
    if (!image || !image->GetData()) {
      return nullptr;
    }

    OSType format = MapPixelFormat(mConfig.mSourcePixelFormat).ref();
    size_t numPlanes = NumberOfPlanes(mConfig.mSourcePixelFormat);
    const PlanarYCbCrImage::Data* yuv = image->GetData();
    if (!yuv) {
      return nullptr;
    }
    auto ySize = yuv->YDataSize();
    auto cbcrSize = yuv->CbCrDataSize();
    void* addresses[3] = {};
    size_t widths[3] = {};
    size_t heights[3] = {};
    size_t strides[3] = {};
    switch (numPlanes) {
      case 3:
        addresses[2] = yuv->mCrChannel;
        widths[2] = cbcrSize.width;
        heights[2] = cbcrSize.height;
        strides[2] = yuv->mCbCrStride;
        [[fallthrough]];
      case 2:
        addresses[1] = yuv->mCbChannel;
        widths[1] = cbcrSize.width;
        heights[1] = cbcrSize.height;
        strides[1] = yuv->mCbCrStride;
        [[fallthrough]];
      case 1:
        addresses[0] = yuv->mYChannel;
        widths[0] = ySize.width;
        heights[0] = ySize.height;
        strides[0] = yuv->mYStride;
        break;
      default:
        return nullptr;
    }

    CVPixelBufferRef buffer = nullptr;
    image->AddRef();  // Grip input buffers.
    CVReturn rv = CVPixelBufferCreateWithPlanarBytes(
        kCFAllocatorDefault, yuv->mPictureRect.width, yuv->mPictureRect.height,
        format, nullptr /* dataPtr */, 0 /* dataSize */, numPlanes, addresses,
        widths, heights, strides, ReleaseImage /* releaseCallback */,
        image /* releaseRefCon */, nullptr /* pixelBufferAttributes */,
        &buffer);
    if (rv == kCVReturnSuccess) {
      return buffer;
      // |image| will be released in |ReleaseImage()|.
    }
    LOGE("CVPIxelBufferCreateWithPlanarBytes error");
    image->Release();
    return nullptr;
  }

  RefPtr<gfx::SourceSurface> surface = aSource->GetAsSourceSurface();
  if (!surface) {
    LOGE("Failed to get SourceSurface");
    return nullptr;
  }

  RefPtr<gfx::DataSourceSurface> dataSurface = surface->GetDataSurface();
  if (!dataSurface) {
    LOGE("Failed to get DataSurface");
    return nullptr;
  }

  gfx::DataSourceSurface::ScopedMap map(dataSurface,
                                        gfx::DataSourceSurface::READ);
  if (NS_WARN_IF(!map.IsMapped())) {
    LOGE("Failed to map DataSurface");
    return nullptr;
  }

  const dom::ImageUtils imageUtils(aSource);
  Maybe<dom::ImageBitmapFormat> format = imageUtils.GetFormat();
  if (format.isNothing()) {
    LOGE("Image conversion not implemented in AppleVTEncoder");
    return nullptr;
  }

  if (format.ref() != mConfig.mSourcePixelFormat) {
    LOGV("Encode image in %s format, even though config's source format is %s",
         dom::GetEnumString(format.ref()).get(),
         dom::GetEnumString(mConfig.mSourcePixelFormat).get());
  }

  Maybe<OSType> imgFormat = MapPixelFormat(format.ref());
  if (imgFormat.isNothing()) {
    LOGE("Failed to get kCVPixelFormatType");
    return nullptr;
  }

  CVPixelBufferRef buffer = nullptr;
  gfx::DataSourceSurface* dss = dataSurface.forget().take();
  CVReturn rv = CVPixelBufferCreateWithBytes(
      kCFAllocatorDefault, dss->GetSize().Width(), dss->GetSize().Height(),
      imgFormat.value(), map.GetData(), map.GetStride(), ReleaseSurface, dss,
      nullptr, &buffer);
  if (rv == kCVReturnSuccess) {
    return buffer;
    // |dss| will be released in |ReleaseSurface()|.
  }
  LOGE("CVPIxelBufferCreateWithBytes error: %d", rv);
  RefPtr<gfx::DataSourceSurface> released = dont_AddRef(dss);
  return nullptr;
}

RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::Drain() {
  return InvokeAsync(mTaskQueue, this, __func__, &AppleVTEncoder::ProcessDrain);
}

RefPtr<MediaDataEncoder::EncodePromise> AppleVTEncoder::ProcessDrain() {
  LOGV("::ProcessDrain");
  AssertOnTaskQueue();
  MOZ_ASSERT(mSession);

  OSStatus status =
      VTCompressionSessionCompleteFrames(mSession, kCMTimeIndefinite);
  if (status != noErr) {
    LOGE("VTCompressionSessionCompleteFrames error");
    return EncodePromise::CreateAndReject(NS_ERROR_DOM_MEDIA_FATAL_ERR,
                                          __func__);
  }

  // Resolve the pending encode promise if any.
  MaybeResolveOrRejectEncodePromise();

  // VTCompressionSessionCompleteFrames() could have queued multiple tasks with
  // the new drained frames. Dispatch a task after them to resolve the promise
  // with those frames.
  RefPtr<AppleVTEncoder> self = this;
  return InvokeAsync(mTaskQueue, __func__, [self]() {
    EncodedData pendingFrames(std::move(self->mEncodedData));
    LOGV("Resolve drain promise with %zu encoded outputs",
         pendingFrames.Length());
    self->mEncodedData = EncodedData();
    return EncodePromise::CreateAndResolve(std::move(pendingFrames), __func__);
  });
}

RefPtr<ShutdownPromise> AppleVTEncoder::Shutdown() {
  return InvokeAsync(mTaskQueue, this, __func__,
                     &AppleVTEncoder::ProcessShutdown);
}

RefPtr<ShutdownPromise> AppleVTEncoder::ProcessShutdown() {
  LOGD("::ProcessShutdown");
  AssertOnTaskQueue();
  if (mSession) {
    VTCompressionSessionInvalidate(mSession);
    CFRelease(mSession);
    mSession = nullptr;
    mInited = false;
  }

  mError = MediaResult(NS_ERROR_DOM_MEDIA_CANCELED, "Canceled in shutdown");
  MaybeResolveOrRejectEncodePromise();
  mError = NS_OK;

  return ShutdownPromise::CreateAndResolve(true, __func__);
}

RefPtr<GenericPromise> AppleVTEncoder::SetBitrate(uint32_t aBitsPerSec) {
  RefPtr<AppleVTEncoder> self = this;
  return InvokeAsync(mTaskQueue, __func__, [self, aBitsPerSec]() {
    MOZ_ASSERT(self->mSession);
    bool rv = SetBitrateAndMode(self->mSession, self->mConfig.mBitrateMode,
                                aBitsPerSec);
    return rv ? GenericPromise::CreateAndResolve(true, __func__)
              : GenericPromise::CreateAndReject(
                    NS_ERROR_DOM_MEDIA_NOT_SUPPORTED_ERR, __func__);
  });
}

void AppleVTEncoder::MaybeResolveOrRejectEncodePromise() {
  AssertOnTaskQueue();

  if (mEncodePromise.IsEmpty()) {
    LOGV(
        "No pending promise to resolve(pending outputs: %zu) or reject(err: "
        "%s)",
        mEncodedData.Length(), mError.Description().get());
    return;
  }

  if (mTimer) {
    mTimer->Cancel();
    mTimer = nullptr;
  }

  if (NS_FAILED(mError.Code())) {
    LOGE("Rejecting encode promise with error: %s", mError.Description().get());
    mEncodePromise.Reject(mError, __func__);
    return;
  }

  LOGV("Resolving with %zu encoded outputs", mEncodedData.Length());
  mEncodePromise.Resolve(std::move(mEncodedData), __func__);
}

void AppleVTEncoder::ForceOutputIfNeeded() {
  if (__builtin_available(macos 11.0, *)) {
    return;
  }
  // Ideally, OutputFrame (called via FrameCallback) should resolve the encode
  // promise. However, sometimes output is produced only after multiple
  // inputs. To ensure continuous encoding, we force the encoder to produce a
  // potentially empty output if no result is received in 50 ms.
  RefPtr<AppleVTEncoder> self = this;
  auto r = NS_NewTimerWithCallback(
      [self](nsITimer* aTimer) {
        if (!self->mSession) {
          LOGV("Do nothing since the encoder has been shut down");
          return;
        }

        LOGV("Resolving the pending promise");
        self->MaybeResolveOrRejectEncodePromise();
      },
      TimeDuration::FromMilliseconds(50), nsITimer::TYPE_ONE_SHOT,
      "EncodingProgressChecker", mTaskQueue);
  if (r.isErr()) {
    LOGE(
        "Failed to set an encoding progress checker. Resolve the pending "
        "promise now");
    MaybeResolveOrRejectEncodePromise();
    return;
  }
  mTimer = r.unwrap();
}

#undef LOGE
#undef LOGW
#undef LOGD
#undef LOGV

}  // namespace mozilla

Messung V0.5

¤ Dauer der Verarbeitung: 0.17 Sekunden (vorverarbeitet) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.