Quelle WMFAudioMFTManager.cpp

Sprache: C

/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim:set ts=2 sw=2 sts=2 et cindent: */
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "WMFAudioMFTManager.h"
#include "MediaInfo.h"
#include "TimeUnits.h"
#include "VideoUtils.h"
#include "WMFUtils.h"
#include "mozilla/AbstractThread.h"
#include "mozilla/Logging.h"
#include "mozilla/Telemetry.h"
#include "nsTArray.h"
#include "BufferReader.h"
#include "mozilla/ScopeExit.h"

#define LOG(...) MOZ_LOG(sPDMLog, mozilla::LogLevel::Debug, (__VA_ARGS__))

namespace mozilla {

using media::TimeUnit;

WMFAudioMFTManager::WMFAudioMFTManager(const AudioInfo& aConfig)
    : mAudioChannels(aConfig.mChannels),
      mChannelsMap(AudioConfig::ChannelLayout::UNKNOWN_MAP),
      mAudioRate(aConfig.mRate),
      mStreamType(GetStreamTypeFromMimeType(aConfig.mMimeType)) {
  MOZ_COUNT_CTOR(WMFAudioMFTManager);

  if (mStreamType == WMFStreamType::AAC) {
    const uint8_t* audioSpecConfig;
    uint32_t configLength;
    if (aConfig.mCodecSpecificConfig.is<AacCodecSpecificData>()) {
      const AacCodecSpecificData& aacCodecSpecificData =
          aConfig.mCodecSpecificConfig.as<AacCodecSpecificData>();
      audioSpecConfig =
          aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Elements();
      configLength =
          aacCodecSpecificData.mDecoderConfigDescriptorBinaryBlob->Length();

      mRemainingEncoderDelay = mEncoderDelay =
          aacCodecSpecificData.mEncoderDelayFrames;
      mTotalMediaFrames = aacCodecSpecificData.mMediaFrameCount;
      LOG("AudioMFT decoder: Found AAC decoder delay (%" PRIu32
          "frames) and total media frames (%" PRIu64 " frames)\n",
          mEncoderDelay, mTotalMediaFrames);
    } else {
      // Gracefully handle failure to cover all codec specific cases above. Once
      // we're confident there is no fall through from these cases above, we
      // should remove this code.
      RefPtr<MediaByteBuffer> audioCodecSpecificBinaryBlob =
          GetAudioCodecSpecificBlob(aConfig.mCodecSpecificConfig);
      audioSpecConfig = audioCodecSpecificBinaryBlob->Elements();
      configLength = audioCodecSpecificBinaryBlob->Length();
    }
    // If no extradata has been provided, assume this is ADTS. Otherwise,
    // assume raw AAC packets.
    mIsADTS = !configLength;
    AACAudioSpecificConfigToUserData(aConfig.mExtendedProfile, audioSpecConfig,
                                     configLength, mUserData);
  }
}

WMFAudioMFTManager::~WMFAudioMFTManager() {
  MOZ_COUNT_DTOR(WMFAudioMFTManager);
}

const GUID& WMFAudioMFTManager::GetMediaSubtypeGUID() {
  MOZ_ASSERT(StreamTypeIsAudio(mStreamType));
  switch (mStreamType) {
    case WMFStreamType::AAC:
      return MFAudioFormat_AAC;
    case WMFStreamType::MP3:
      return MFAudioFormat_MP3;
    default:
      return GUID_NULL;
  };
}

bool WMFAudioMFTManager::Init() {
  NS_ENSURE_TRUE(StreamTypeIsAudio(mStreamType), false);

  RefPtr<MFTDecoder> decoder(new MFTDecoder());
  // Note: MP3 MFT isn't registered as supporting Float output, but it works.
  // Find PCM output MFTs as this is the common type.
  HRESULT hr = WMFDecoderModule::CreateMFTDecoder(mStreamType, decoder);
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  // Setup input/output media types
  RefPtr<IMFMediaType> inputType;

  hr = wmf::MFCreateMediaType(getter_AddRefs(inputType));
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = inputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = inputType->SetGUID(MF_MT_SUBTYPE, GetMediaSubtypeGUID());
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = inputType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, mAudioRate);
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = inputType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, mAudioChannels);
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  if (mStreamType == WMFStreamType::AAC) {
    UINT32 payloadType = mIsADTS ? 1 : 0;
    hr = inputType->SetUINT32(MF_MT_AAC_PAYLOAD_TYPE, payloadType);
    NS_ENSURE_TRUE(SUCCEEDED(hr), false);

    hr = inputType->SetBlob(MF_MT_USER_DATA, mUserData.Elements(),
                            mUserData.Length());
    NS_ENSURE_TRUE(SUCCEEDED(hr), false);
  }

  RefPtr<IMFMediaType> outputType;
  hr = wmf::MFCreateMediaType(getter_AddRefs(outputType));
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = outputType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = outputType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_Float);
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = outputType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, 32);
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  hr = decoder->SetMediaTypes(inputType, outputType);
  NS_ENSURE_TRUE(SUCCEEDED(hr), false);

  mDecoder = decoder;

  return true;
}

HRESULT
WMFAudioMFTManager::Input(MediaRawData* aSample) {
  mLastInputTime = aSample->mTime;
  return mDecoder->Input(aSample->Data(), uint32_t(aSample->Size()),
                         aSample->mTime.ToMicroseconds(),
                         aSample->mDuration.ToMicroseconds());
}

nsCString WMFAudioMFTManager::GetCodecName() const {
  if (mStreamType == WMFStreamType::AAC) {
    return "aac"_ns;
  }
  if (mStreamType == WMFStreamType::MP3) {
    return "mp3"_ns;
  }
  return "unknown"_ns;
}

HRESULT
WMFAudioMFTManager::UpdateOutputType() {
  HRESULT hr;

  RefPtr<IMFMediaType> type;
  hr = mDecoder->GetOutputMediaType(type);
  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  hr = type->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, &mAudioRate);
  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  hr = type->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &mAudioChannels);
  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  uint32_t channelsMap;
  hr = type->GetUINT32(MF_MT_AUDIO_CHANNEL_MASK, &channelsMap);
  if (SUCCEEDED(hr)) {
    mChannelsMap = channelsMap;
  } else {
    LOG("Unable to retrieve channel layout. Ignoring");
    mChannelsMap = AudioConfig::ChannelLayout::UNKNOWN_MAP;
  }

  return S_OK;
}

HRESULT
WMFAudioMFTManager::Output(int64_t aStreamOffset, RefPtr<MediaData>& aOutput) {
  aOutput = nullptr;
  RefPtr<IMFSample> sample;
  HRESULT hr;
  int typeChangeCount = 0;
  const auto oldAudioRate = mAudioRate;
  while (true) {
    hr = mDecoder->Output(&sample);
    if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
      return hr;
    }
    if (hr == MF_E_TRANSFORM_STREAM_CHANGE) {
      hr = mDecoder->FindDecoderOutputType();
      NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
      hr = UpdateOutputType();
      NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
      // Catch infinite loops, but some decoders perform at least 2 stream
      // changes on consecutive calls, so be permissive.
      // 100 is arbitrarily > 2.
      NS_ENSURE_TRUE(typeChangeCount < 100, MF_E_TRANSFORM_STREAM_CHANGE);
      ++typeChangeCount;
      continue;
    }
    break;
  }

  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  if (!sample) {
    LOG("Audio MFTDecoder returned success but null output.");
    return E_FAIL;
  }

  UINT32 discontinuity = false;
  sample->GetUINT32(MFSampleExtension_Discontinuity, &discontinuity);
  if (mFirstFrame || discontinuity) {
    // Update the output type, in case this segment has a different
    // rate. This also triggers on the first sample, which can have a
    // different rate than is advertised in the container, and sometimes we
    // don't get a MF_E_TRANSFORM_STREAM_CHANGE when the rate changes.
    hr = UpdateOutputType();
    NS_ENSURE_TRUE(SUCCEEDED(hr), hr);
    mFirstFrame = false;
  }

  LONGLONG hns;
  hr = sample->GetSampleTime(&hns);
  if (FAILED(hr)) {
    return E_FAIL;
  }
  TimeUnit pts = TimeUnit::FromHns(hns, mAudioRate);
  NS_ENSURE_TRUE(pts.IsValid(), E_FAIL);

  RefPtr<IMFMediaBuffer> buffer;
  hr = sample->ConvertToContiguousBuffer(getter_AddRefs(buffer));
  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  BYTE* data = nullptr;  // Note: *data will be owned by the IMFMediaBuffer, we
                         // don't need to free it.
  DWORD maxLength = 0, currentLength = 0;
  hr = buffer->Lock(&data, &maxLength, ¤tLength);
  ScopeExit exit([buffer] { buffer->Unlock(); });
  NS_ENSURE_TRUE(SUCCEEDED(hr), hr);

  // Output is made of floats.
  uint32_t numSamples = currentLength / sizeof(float);
  uint32_t numFrames = numSamples / mAudioChannels;
  MOZ_ASSERT(numFrames >= 0);
  MOZ_ASSERT(numSamples >= 0);
  if (numFrames == 0) {
    // All data from this chunk stripped, loop back and try to output the next
    // frame, if possible.
    return S_OK;
  }

  if (oldAudioRate != mAudioRate) {
    LOG("Audio rate changed from %" PRIu32 " to %" PRIu32, oldAudioRate,
        mAudioRate);
  }

  AlignedAudioBuffer audioData(numSamples);
  if (!audioData) {
    return E_OUTOFMEMORY;
  }

  float* floatData = reinterpret_cast<float*>(data);
  PodCopy(audioData.Data(), floatData, numSamples);

  TimeUnit duration(numFrames, mAudioRate);
  NS_ENSURE_TRUE(duration.IsValid(), E_FAIL);

  const bool isAudioRateChangedToHigher = oldAudioRate < mAudioRate;
  if (IsPartialOutput(duration, isAudioRateChangedToHigher)) {
    LOG("Encounter a partial frame?! duration shrinks from %s to %s",
        mLastOutputDuration.ToString().get(), duration.ToString().get());
    return MF_E_TRANSFORM_NEED_MORE_INPUT;
  }

  aOutput = new AudioData(aStreamOffset, pts, std::move(audioData),
                          mAudioChannels, mAudioRate, mChannelsMap);
  MOZ_DIAGNOSTIC_ASSERT(duration == aOutput->mDuration, "must be equal");
  mLastOutputDuration = aOutput->mDuration;

#ifdef LOG_SAMPLE_DECODE
  LOG("Decoded audio sample! timestamp=%lld duration=%lld currentLength=%u",
      pts.ToMicroseconds(), duration.ToMicroseconds(), currentLength);
#endif

  return S_OK;
}

bool WMFAudioMFTManager::IsPartialOutput(
    const media::TimeUnit& aNewOutputDuration,
    const bool aIsRateChangedToHigher) const {
  // This issue was found in Windows11, where AAC MFT decoder would incorrectly
  // output partial output samples to us, even if MS's documentation said it
  // won't happen [1]. More details are described in bug 1731430 comment 26.
  // If the audio rate isn't changed to higher, which would result in shorter
  // duration, but the new output duration is still shorter than the last one,
  // then new output is possible an incorrect partial output.
  // [1]
  // https://docs.microsoft.com/en-us/windows/win32/medfound/mft-message-command-drain
  if (mStreamType != WMFStreamType::AAC) {
    return false;
  }
  if (mLastOutputDuration > aNewOutputDuration && !aIsRateChangedToHigher) {
    return true;
  }
  return false;
}

void WMFAudioMFTManager::Shutdown() { mDecoder = nullptr; }

}  // namespace mozilla

#undef LOG

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.16 Sekunden (vorverarbeitet am 2026-05-03) ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.