我正在使用ffmpeg库编写音频转码应用程序.
这是我的代码
这是我的代码
- /*
- * File: main.cpp
- * Author: vinod
- * Compile with "g++ -std=c++11 -o audiotranscode main.cpp -lavformat -lavcodec -lavutil -lavfilter"
- *
- */
- #if !defined PRId64 || PRI_MACROS_BROKEN
- #undef PRId64
- #define PRId64 "lld"
- #endif
- #define __STDC_FORMAT_MACROS
- #ifdef __cplusplus
- extern "C" {
- #endif
- #include <stdio.h>
- #include <stdlib.h>
- #include <sys/types.h>
- #include <stdint.h>
- #include <libavutil/imgutils.h>
- #include <libavutil/samplefmt.h>
- #include <libavutil/frame.h>
- #include <libavutil/timestamp.h>
- #include <libavformat/avformat.h>
- #include <libavfilter/avfilter.h>
- #include <libavfilter/buffersrc.h>
- #include <libavfilter/buffersink.h>
- #include <libswscale/swscale.h>
- #include <libavutil/opt.h>
- #ifdef __cplusplus
- }
- #endif
- #include <iostream>
- using namespace std;
- int select_stream,got_frame,got_packet;
- AVFormatContext *in_fmt_ctx = NULL,*out_fmt_ctx = NULL;
- AVCodec *dec_codec = NULL,* enc_codec = NULL;
- AVStream *audio_st = NULL;
- AVCodecContext *enc_ctx = NULL,*dec_ctx = NULL;
- AVFrame *pFrame = NULL,* pFrameFiltered = NULL;
- AVFilterGraph *filter_graph = NULL;
- AVFilterContext *buffersrc_ctx = NULL;
- AVFilterContext *buffersink_ctx = NULL;
- AVPacket packet;
- string inFileName = "/home/vinod/vinod/Media/univac.webm";
- string outFileName = "audio_extracted.m4a";
- int target_bit_rate = 128000,sample_rate = 22050,channels = 1;
- AVSampleFormat sample_fmt = AV_SAMPLE_FMT_S16;
- string filter_description = "aresample=22050,aformat=sample_fmts=s16:channel_layouts=mono";
- int log_averror(int errcode)
- {
- char *errbuf = (char *) calloc(AV_ERROR_MAX_STRING_SIZE,sizeof(char));
- av_strerror(errcode,errbuf,AV_ERROR_MAX_STRING_SIZE);
- std::cout << "Error - " << errbuf << std::endl;
- delete [] errbuf;
- return -1;
- }
- /**
- * Initialize conversion filter */
- int initialize_audio_filter()
- {
- char args[512];
- int ret;
- AVFilter *buffersrc = avfilter_get_by_name("abuffer");
- AVFilter *buffersink = avfilter_get_by_name("abuffersink");
- AVFilterInOut *outputs = avfilter_inout_alloc();
- AVFilterInOut *inputs = avfilter_inout_alloc();
- filter_graph = avfilter_graph_alloc();
- const enum AVSampleFormat out_sample_fmts[] = {sample_fmt,AV_SAMPLE_FMT_NONE};
- const int64_t out_channel_layouts[] = {av_get_default_channel_layout(out_fmt_ctx -> streams[0] -> codec -> channels),-1};
- const int out_sample_rates[] = {out_fmt_ctx -> streams[0] -> codec -> sample_rate,-1};
- if (!dec_ctx->channel_layout)
- dec_ctx->channel_layout = av_get_default_channel_layout(dec_ctx->channels);
- snprintf(args,sizeof(args),"time_base=%d/%d:sample_rate=%d:sample_fmt=%s:channel_layout=0x%" PRIx64,in_fmt_ctx -> streams[select_stream] -> time_base.num,in_fmt_ctx -> streams[select_stream] -> time_base.den,dec_ctx->sample_rate,av_get_sample_fmt_name(dec_ctx->sample_fmt),dec_ctx->channel_layout);
- ret = avfilter_graph_create_filter(&buffersrc_ctx,buffersrc,"in",args,NULL,filter_graph);
- if (ret < 0) {
- av_log(NULL,AV_LOG_ERROR,"Cannot create buffer source\n");
- return -1;
- }
- ret = avfilter_graph_create_filter(&buffersink_ctx,buffersink,"out","Cannot create buffer sink\n");
- return ret;
- }
- ret = av_opt_set_int_list(buffersink_ctx,"sample_fmts",out_sample_fmts,-1,AV_OPT_SEARCH_CHILDREN);
- if (ret < 0) {
- av_log(NULL,"Cannot set output sample format\n");
- return ret;
- }
- ret = av_opt_set_int_list(buffersink_ctx,"channel_layouts",out_channel_layouts,"Cannot set output channel layout\n");
- return ret;
- }
- ret = av_opt_set_int_list(buffersink_ctx,"sample_rates",out_sample_rates,"Cannot set output sample rate\n");
- return ret;
- }
- /* Endpoints for the filter graph. */
- outputs -> name = av_strdup("in");
- outputs -> filter_ctx = buffersrc_ctx;
- outputs -> pad_idx = 0;
- outputs -> next = NULL;
- /* Endpoints for the filter graph. */
- inputs -> name = av_strdup("out");
- inputs -> filter_ctx = buffersink_ctx;
- inputs -> pad_idx = 0;
- inputs -> next = NULL;
- string filter_desc = filter_description;
- if ((ret = avfilter_graph_parse_ptr(filter_graph,filter_desc.c_str(),&inputs,&outputs,NULL)) < 0) {
- log_averror(ret);
- exit(1);
- }
- if ((ret = avfilter_graph_config(filter_graph,NULL)) < 0) {
- log_averror(ret);
- exit(1);
- }
- /* Print summary of the sink buffer
- * Note: args buffer is reused to store channel layout string */
- AVFilterLink *outlink = buffersink_ctx->inputs[0];
- av_get_channel_layout_string(args,outlink->channel_layout);
- av_log(NULL,AV_LOG_INFO,"Output: srate:%dHz fmt:%s chlayout:%s\n",(int) outlink->sample_rate,(char *) av_x_if_null(av_get_sample_fmt_name((AVSampleFormat) outlink->format),"?"),args);
- return 0;
- }
- /*
- *
- */
- int main(int argc,char **argv)
- {
- int ret;
- cout << "Hello World" << endl;
- printf("abcd");
- avcodec_register_all();
- av_register_all();
- avfilter_register_all();
- /* open input file,and allocate format context */
- if (avformat_open_input(&in_fmt_ctx,inFileName.c_str(),NULL) < 0) {
- std::cout << "error opening input file - " << inFileName << std::endl;
- return -1;
- }
- /* retrieve stream information */
- if (avformat_find_stream_info(in_fmt_ctx,NULL) < 0) {
- std::cerr << "Could not find stream information in the input file " << inFileName << std::endl;
- }
- /* Dump format details */
- printf("\n ---------------------------------------------------------------------- \n");
- av_dump_format(in_fmt_ctx,0);
- printf("\n ---------------------------------------------------------------------- \n");
- /* Choose a audio stream */
- select_stream = av_find_best_stream(in_fmt_ctx,AVMEDIA_TYPE_AUdio,&dec_codec,0);
- if (select_stream == AVERROR_STREAM_NOT_FOUND) {
- std::cerr << "No audio stream found" << std::endl;
- return -1;
- }
- if (select_stream == AVERROR_DECODER_NOT_FOUND) {
- std::cerr << "No suitable decoder found" << std::endl;
- return -1;
- }
- dec_ctx = in_fmt_ctx -> streams[ select_stream] -> codec;
- av_opt_set_int(dec_ctx,"refcounted_frames",1,0);
- /* init the audio decoder */
- if ((ret = avcodec_open2(dec_ctx,dec_codec,NULL)) < 0) {
- av_log(NULL,"Cannot open audio decoder\n");
- return ret;
- }
- /* allocate output context */
- ret = avformat_alloc_output_context2(&out_fmt_ctx,outFileName.c_str());
- if (ret < 0) {
- std::cerr << "Could not create output context for the file " << outFileName << std::endl;
- return -1;
- }
- /* find the encoder */
- enum AVCodecID codec_id = out_fmt_ctx -> oformat -> audio_codec;
- enc_codec = avcodec_find_encoder(codec_id);
- if (!(enc_codec)) {
- std::cerr << "Could not find encoder for - " << avcodec_get_name(codec_id) << std::endl;
- return -1;
- }
- /* add a new stream */
- audio_st = avformat_new_stream(out_fmt_ctx,enc_codec);
- if (!audio_st) {
- std::cerr << "Could not add audio stream - " << std::endl;
- }
- /* Initialise audio codec */
- audio_st -> id = out_fmt_ctx -> nb_streams - 1;
- enc_ctx = audio_st -> codec;
- enc_ctx -> codec_id = codec_id;
- enc_ctx -> codec_type = AVMEDIA_TYPE_AUdio;
- enc_ctx -> bit_rate = target_bit_rate;
- enc_ctx -> sample_rate = sample_rate;
- enc_ctx -> sample_fmt = sample_fmt;
- enc_ctx -> channels = channels;
- enc_ctx -> channel_layout = av_get_default_channel_layout(enc_ctx -> channels);
- /* Some formats want stream headers to be separate. */
- if (out_fmt_ctx -> oformat -> flags & AVFMT_GLOBALHEADER) {
- enc_ctx -> flags |= CODEC_FLAG_GLOBAL_HEADER;
- }
- ret = avcodec_open2(out_fmt_ctx -> streams[0] -> codec,enc_codec,NULL);
- if (ret < 0) {
- std::cerr << "Could not create codec context for the file " << outFileName << std::endl;
- return -1;
- }
- /* Initialize filter */
- initialize_audio_filter();
- if (!(out_fmt_ctx -> oformat -> flags & AVFMT_NOFILE)) {
- int ret = avio_open(& out_fmt_ctx -> pb,outFileName.c_str(),AVIO_FLAG_WRITE);
- if (ret < 0) {
- log_averror(ret);
- return -1;
- }
- }
- /* Write header */
- if (avformat_write_header(out_fmt_ctx,NULL) < 0) {
- if (ret < 0) {
- log_averror(ret);
- return -1;
- }
- }
- /* Allocate frame */
- pFrame = av_frame_alloc();
- if (!pFrame) {
- std::cerr << "Could not allocate frame\n";
- return -1;
- }
- pFrameFiltered = av_frame_alloc();
- if (!pFrameFiltered) {
- std::cerr << "Could not allocate frame\n";
- return -1;
- }
- av_init_packet(&packet);
- packet.data = NULL;
- packet.size = 0;
- /* Read packet from the stream */
- while (av_read_frame(in_fmt_ctx,&packet) >= 0) {
- if (packet.stream_index == select_stream) {
- avcodec_get_frame_defaults(pFrame);
- ret = avcodec_decode_audio4(dec_ctx,pFrame,&got_frame,&packet);
- if (ret < 0) {
- log_averror(ret);
- return ret;
- }
- printf("Decoded packet pts : %ld ",packet.pts);
- printf("Frame Best Effor pts : %ld \n",pFrame->best_effort_timestamp);
- /* Set frame pts */
- pFrame -> pts = av_frame_get_best_effort_timestamp(pFrame);
- if (got_frame) {
- /* push the decoded frame into the filtergraph */
- ret = av_buffersrc_add_frame_flags(buffersrc_ctx,AV_BUFFERSRC_FLAG_KEEP_REF);
- if (ret < 0) {
- log_averror(ret);
- return ret;
- }
- /* pull filtered frames from the filtergraph */
- while (1) {
- ret = av_buffersink_get_frame(buffersink_ctx,pFrameFiltered);
- if ((ret == AVERROR(EAGAIN)) || (ret == AVERROR_EOF)) {
- break;
- }
- if (ret < 0) {
- printf("Error while getting filtered frames from filtergraph\n");
- log_averror(ret);
- return -1;
- }
- /* Initialize the packets */
- AVPacket encodedPacket = {0};
- av_init_packet(&encodedPacket);
- ret = avcodec_encode_audio2(out_fmt_ctx -> streams[0] -> codec,&encodedPacket,pFrameFiltered,&got_packet);
- if (!ret && got_packet && encodedPacket.size) {
- /* Set correct pts and dts */
- if (encodedPacket.pts != AV_NOPTS_VALUE) {
- encodedPacket.pts = av_rescale_q(encodedPacket.pts,buffersink_ctx -> inputs[0] -> time_base,out_fmt_ctx -> streams[0] -> time_base);
- }
- if (encodedPacket.dts != AV_NOPTS_VALUE) {
- encodedPacket.dts = av_rescale_q(encodedPacket.dts,out_fmt_ctx -> streams[0] -> time_base);
- }
- printf("Encoded packet pts %ld\n",encodedPacket.pts);
- /* Write the compressed frame to the media file. */
- ret = av_interleaved_write_frame(out_fmt_ctx,&encodedPacket);
- if (ret < 0) {
- log_averror(ret);
- return -1;
- }
- } else if (ret < 0) {
- log_averror(ret);
- return -1;
- }
- av_frame_unref(pFrameFiltered);
- }
- av_frame_unref(pFrame);
- }
- }
- }
- /* Flush delayed frames from encoder*/
- got_packet=1;
- while (got_packet) {
- AVPacket encodedPacket = {0};
- av_init_packet(&encodedPacket);
- ret = avcodec_encode_audio2(out_fmt_ctx -> streams[0] -> codec,&got_packet);
- if (!ret && got_packet && encodedPacket.size) {
- /* Set correct pts and dts */
- if (encodedPacket.pts != AV_NOPTS_VALUE) {
- encodedPacket.pts = av_rescale_q(encodedPacket.pts,out_fmt_ctx -> streams[0] -> time_base);
- }
- if (encodedPacket.dts != AV_NOPTS_VALUE) {
- encodedPacket.dts = av_rescale_q(encodedPacket.dts,out_fmt_ctx -> streams[0] -> time_base);
- }
- printf("Encoded packet pts %ld\n",encodedPacket.pts);
- /* Write the compressed frame to the media file. */
- ret = av_interleaved_write_frame(out_fmt_ctx,&encodedPacket);
- if (ret < 0) {
- log_averror(ret);
- return -1;
- }
- } else if (ret < 0) {
- log_averror(ret);
- return -1;
- }
- }
- /* Write Trailer */
- av_write_trailer(out_fmt_ctx);
- avfilter_graph_free(&filter_graph);
- if (dec_ctx)
- avcodec_close(dec_ctx);
- avformat_close_input(&in_fmt_ctx);
- av_frame_free(&pFrame);
- av_frame_free(&pFrameFiltered);
- if (!(out_fmt_ctx -> oformat -> flags & AVFMT_NOFILE))
- avio_close(out_fmt_ctx -> pb);
- avcodec_close(out_fmt_ctx->streams[0]->codec);
- avformat_free_context(out_fmt_ctx);
- return 0;
- }
转码后的音频文件与输入的持续时间相同.但它完全嘈杂.谁能告诉我这里做错了什么!
解决方法
我已经找到了问题的所在,并且已经解决了.
当以大胆打开输出文件时,可以看到在音频信号中插入了不需要的静音.问题在于提供给编码器的“每帧采样数”.
不同的编解码器期望编码的帧大小不同.并且aac编码器期望大小为1024.这可以通过在执行avcodec_open2()之后观察enc_ctx-> frame_size来看出.
滤波器需要向编码器提供每通道1024个采样数的帧.
所以在我的代码中,pFrameFiltered每个通道需要有1024个样本数.如果它小于1024,编码器会附加零,使其成为1024个样本,然后对其进行编码.
这可以通过拥有我们自己的fifo队列或使用ffmpeg音频过滤器提供的过滤器来解决.我们需要使用过滤器asetnsamples = n = 1024:p = 0,如here所述.所以需要的更改是
- `string filter_description =
- "aresample=22050,aformat=sample_fmts=s16:channel_layouts=mono,asetnsamples=n=1024:p=0";`
只需在过滤器中使用n的值来更好地理解.检查avcodec_open2()设置的enc_ctx-> frame_size字段,并适当设置n的值.