locked
AAC encoding example via MFT RRS feed

  • Question

  • Does anybody have an example of a AAC encoder MFT that works? I am trying to encode a PCM wav file into AAC. But the resultant AAC encoded file is always "fast", as in all the sound plays for a couple of seconds and it exits.

    This is my MFT, Pretty standard.

    AudioSampleEncoder :: AudioSampleEncoder()
    {
    	m_transform = NULL;
    	m_inRsAudioSamplesPerSecond = 44100;
    	m_inRsAudioBitsPerSample = 16;
    }
    
    AudioSampleEncoder :: ~AudioSampleEncoder()
    {
    	SafeRelease(&m_transform);
    }
    
    void AudioSampleEncoder :: Release()
    {
    	printf("All audio resources will be released");
            delete this;
    }
    
    HRESULT AudioSampleEncoder :: InitializeMFTEncoder()
    {
    	HRESULT hr = S_OK;
    	UINT32 count = 0;
    	IMFActivate ** activate = NULL;
    	MFT_REGISTER_TYPE_INFO info = { 0 };
    
    	info.guidMajorType = MFMediaType_Audio;
    	info.guidSubtype = MFAudioFormat_AAC;
    	UINT32 flags = MFT_ENUM_FLAG_SYNCMFT | MFT_ENUM_FLAG_ASYNCMFT | MFT_ENUM_FLAG_LOCALMFT | MFT_ENUM_FLAG_TRANSCODE_ONLY | MFT_ENUM_FLAG_SORTANDFILTER;
    
    	hr = MFTEnumEx(MFT_CATEGORY_AUDIO_ENCODER,
    				   flags,
    				   NULL,
    				   &info,
    				   &activate,
    				   &count);
    	if (FAILED(hr)) {
    		printf("Fail at MFTEnumEx. AAC transform encoder could not be initialized");
    		goto done;
    	}
    	printf("MFTEnumEx succeeded");
    
    	if (count == 0) {
    		printf("Fail at getting the AAC encoder. AAC transform encoder could not be initialized");
    		goto done;
    	}
    
    	hr = activate[0]->ActivateObject(IID_PPV_ARGS(&m_transform));
    	if (FAILED(hr)) {
    		printf("Fail at activating AAC encoder. AAC transform encoder could not be initialized");
    		goto done;
    	}
    	printf("AAC MFT encoder successfully initialized.");
    
    done:
    	for (UINT32 idx = 0; idx < count; idx++) {
            activate[idx]->Release();
        }
        CoTaskMemFree(activate);
    	return hr;
    }
    
    // For this MFT, set the input media type before output media type.
    HRESULT AudioSampleEncoder :: SetOutputMediaType()
    {
    	HRESULT hr = S_OK;
    	IMFMediaType * outMediaType = NULL;
    
    	UINT32 outblockAlign = m_inAudioNumofChannels * (m_inRsAudioBitsPerSample / 8);
    	UINT32 outBytesPerSecond = outblockAlign * m_inRsAudioSamplesPerSecond;
    
    	CHECK_HR(hr = MFCreateMediaType(&outMediaType));
    	printf("Success at creating output media type");
    	CHECK_HR(outMediaType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio));
    	printf("Success at MF_MT_MAJOR_TYPE");
    	CHECK_HR(outMediaType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_AAC));
    	printf("Success at MF_MT_SUBTYPE");
    	CHECK_HR(outMediaType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, m_inRsAudioSamplesPerSecond));
    	printf("Success at MF_MT_AUDIO_SAMPLES_PER_SECOND");
    	CHECK_HR(outMediaType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, m_inRsAudioBitsPerSample));
    	printf("Success at MF_MT_AUDIO_BITS_PER_SAMPLE");
    	CHECK_HR(outMediaType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, m_inAudioNumofChannels));
    	printf("Success at MF_MT_AUDIO_NUM_CHANNELS");
    	CHECK_HR(outMediaType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 12000));
    	printf("Success at MF_MT_AUDIO_AVG_BYTES_PER_SECOND");
    	CHECK_HR(hr = outMediaType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, outblockAlign));
    	printf("Success at MF_MT_AUDIO_BLOCK_ALIGNMENT");
    
    	hr = m_transform->SetOutputType(0, outMediaType, 0);
    	if (FAILED(hr)) {
    		CL_DEBUG("Failed at SetOutputType");
    		goto done;
    	}
    	printf("Output media type successfully set for AAC encoder");
    	
    done:
    	SafeRelease(&outMediaType);
    	return hr;
    }
    
    //Set input type before setting the output type
    HRESULT AudioSampleEncoder :: SetInputMediaType(IMFMediaType * inMediaType)
    {
    	HRESULT hr = S_OK;
    	IMFMediaType * inAudioType = NULL;
    
    	CHECK_HR(hr = inMediaType->GetGUID(MF_MT_SUBTYPE, &m_inAudioSubType));
    	printf("Successfully retrieved input subtype.");
    	CHECK_HR(hr = inMediaType->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, &m_inAudioSamplesPerSecond));
    	printf("Successfully retrieved input MF_MT_AUDIO_SAMPLES_PER_SECOND.");
    	CHECK_HR(hr = inMediaType->GetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, &m_inAudioBitsPerSample));
    	printf("Successfully retrieved input MF_MT_AUDIO_BITS_PER_SAMPLE.");
    	CHECK_HR(hr = inMediaType->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &m_inAudioNumofChannels));
    	printf("Successfully retrieved input MF_MT_AUDIO_NUM_CHANNELS.");
    
    
    	UINT32 inblockAlign = m_inAudioNumofChannels * (m_inRsAudioBitsPerSample / 8);
    	UINT32 inBytesPerSecond = inblockAlign * m_inRsAudioSamplesPerSecond; // (block alignment) * (sample rate)
    
    	CHECK_HR(hr = MFCreateMediaType(&inAudioType));
    	printf("Success at creating output media type");
    	CHECK_HR(inAudioType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio));
    	printf("Success at MF_MT_MAJOR_TYPE");
    	CHECK_HR(inAudioType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM));
    	printf("Success at MF_MT_SUBTYPE");
    	CHECK_HR(hr = inAudioType->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, m_inRsAudioSamplesPerSecond));
    	printf("Success at MF_MT_AUDIO_SAMPLES_PER_SECOND");
    	CHECK_HR(hr = inAudioType->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, m_inRsAudioBitsPerSample));
    	printf("Success at MF_MT_AUDIO_BITS_PER_SAMPLE");
    	CHECK_HR(hr = inAudioType->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, m_inAudioNumofChannels));
    	printf("Success at MF_MT_AUDIO_NUM_CHANNELS");
    	CHECK_HR(hr = inAudioType->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, inBytesPerSecond));
    	printf("Success at MF_MT_AUDIO_AVG_BYTES_PER_SECOND");
    	CHECK_HR(hr = inAudioType->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, inblockAlign));
    	printf("Success at MF_MT_AUDIO_BLOCK_ALIGNMENT");
    	CHECK_HR(hr = inAudioType->SetUINT32(MF_MT_ALL_SAMPLES_INDEPENDENT, 1));
    	printf("Success at MF_MT_ALL_SAMPLES_INDEPENDENT");
    
    	hr = m_transform->SetInputType(0, inAudioType, 0);
    	if (FAILED(hr)) {
    		printf("Fail at SetInputType for AAC encoder");
    		goto done;
    	}
    	printf("Successfully set input media type for AAC encoder");
    
    done:
    	SafeRelease(&inAudioType);
    	return hr;
    }
    
    HRESULT AudioSampleEncoder :: EncodeSample(IMFSample * inSample, IMFSample ** outSample)
    {
    	HRESULT hr = S_OK;
    	IMFMediaBuffer * outMediaBuffer = NULL;
    	DWORD processOutputStatus;
    	MFT_OUTPUT_STREAM_INFO outStreamInfo = {0};
    	MFT_OUTPUT_DATA_BUFFER outputData = {0};
    
    	outputData.dwStreamID = 0;
    
    	hr = m_transform->GetOutputStreamInfo(outputData.dwStreamID, &outStreamInfo);
    	if (FAILED(hr)) {
    		printf("Failed at GetOutputStreamInfo for output data");
    		goto done;
    	}
    
    	hr = MFCreateMemoryBuffer(outStreamInfo.cbSize, &outMediaBuffer);
    	if (FAILED(hr)) {
    		printf("Failed at CreateBuffer for output data");
    		goto done;
    	}
    
    	hr = outMediaBuffer->SetCurrentLength(outStreamInfo.cbSize);
    	if (FAILED(hr)) {
    		printf("Failed at CreateBuffer for output data");
    		goto done;
    	}
    
    	hr = MFCreateSample(&(outputData.pSample));
    	if (FAILED(hr)) {
    		CL_DEBUG("Failed at CreateSample for output data");
    		goto done;
    	}
    
    	hr = outputData.pSample->AddBuffer(outMediaBuffer);
    	if (FAILED(hr)) {
    		printf("Failed at AddBuffer for output data");
    		goto done;
    	}
    
    	hr = m_transform->ProcessInput(0, inSample, 0);
    	if (FAILED(hr)) {
    		printf("Failed at ProcessInput");
    		goto done;
    	}
    
    	hr = m_transform->ProcessOutput(0, 1, &outputData, &processOutputStatus);
    	if (FAILED(hr)) {
    		if (hr == MF_E_TRANSFORM_NEED_MORE_INPUT) {
    			hr = S_OK;
    		} else { 
    			printf("Fail at ProcessOutput");
    			goto done;
    		}
    	}
    
    	*outSample = outputData.pSample;
    	(*outSample)->AddRef();
    
    	hr = m_transform->ProcessMessage(MFT_MESSAGE_COMMAND_FLUSH, NULL);
    	if (FAILED(hr)) {
    		printf("Fail at flushing output sample");
    		goto done;
    	}
    
    done:
    	SafeRelease(&outMediaBuffer);
    	SafeRelease(&outputData.pSample);
    	return hr;
    }

    Friday, July 11, 2014 11:33 AM

Answers

  • OK, I got it.

    The way to do it is to call ProcessInput, then in a loop, keep calling ProcessOutput until

    hr = MF_E_TRANSFORM_NEED_MORE_INPUT.

    And it works.

    • Marked as answer by Kadambi Wednesday, July 16, 2014 11:27 AM
    Wednesday, July 16, 2014 11:27 AM

All replies

  • As a test, I initialized a writer and wrote the same sample (where the sink writer transcodes from pcm to AAC). It worked fine.

    This encoder is not working properly.

    What am I doing wrong?

    Monday, July 14, 2014 4:25 AM
  • If I do a MFTrace for ProcessInput and ProcessOutput, this is what I see:

    20500,1878 08:30:41.59382 CMFTransformDetours::ProcessInput @00828F5C Stream ID 0, Sample @0046C150, Time 800ms, Duration 100ms, Buffers 1, Size 8820B, MFSampleExtension_CleanPoint=1;MFSampleExtension_Token=@00000000
    20500,1878 08:30:41.59490 CMFTransformDetours::ProcessOutput @00828F5C Stream ID 0, Sample @0046C0D8, Time 800ms, Duration 23ms, Buffers 1, Size 248B, MFSampleExtension_Discontinuity=1


    I see MFSampleExtension_Discontinuity=1 being set at every ProcessOutput. Could this be the reason. Why is it set?

    Edit: Setting MFSampleExtension_Discontinuity=0 explicitly didn't help.

    • Edited by Kadambi Monday, July 14, 2014 11:27 AM
    Monday, July 14, 2014 8:43 AM
  • This is what the AAC encoder doc says: "Each output sample contains one compressed AAC frame corresponding to 1024 PCM samples. For example, at 48 Khz sampling rate, the duration of one compressed frame is 21.33 msec." http://msdn.microsoft.com/en-us/library/windows/desktop/dd742785(v=vs.85).aspx My input sample has a buffer of 8820 bytes. At 2 bytes per PCM sample, that is 4410 PCM samples. So, do I have to split it to 1024 samples for ProcessInput call?
    Tuesday, July 15, 2014 5:53 AM
  • Really, can anybody clue me in? Do I have to send 1024 samples to ProcessInput at a time?
    Tuesday, July 15, 2014 10:15 AM
  • So, I tried with sending just 1024 PCM samples. No dice.

    What would it take to get a response from documentation team?

    Wednesday, July 16, 2014 4:53 AM
  • OK, I got it.

    The way to do it is to call ProcessInput, then in a loop, keep calling ProcessOutput until

    hr = MF_E_TRANSFORM_NEED_MORE_INPUT.

    And it works.

    • Marked as answer by Kadambi Wednesday, July 16, 2014 11:27 AM
    Wednesday, July 16, 2014 11:27 AM