From 8ccad6937177b1b92e40ab8f4447ea27bac009a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Lalinsk=C3=BD?= Date: Fri, 4 Nov 2022 21:47:38 +0100 Subject: [PATCH] Use FFmpeg 5.x (#120) * Use FFmpeg 5.1.2 for CI builds * Build on Ubuntu 20.04 * Upgrade code to FFmpeg 5.x APIs * Only set FFmpeg include dirs if building tools * No longer needed * Use ubuntu 20.04 --- .github/workflows/build.yml | 6 +- CMakeLists.txt | 16 -- package/build.sh | 4 +- src/audio/ffmpeg_audio_processor.h | 2 - src/audio/ffmpeg_audio_processor_avresample.h | 72 ------- src/audio/ffmpeg_audio_processor_swresample.h | 18 +- src/audio/ffmpeg_audio_reader.h | 197 +++++++++--------- tests/CMakeLists.txt | 6 + 8 files changed, 122 insertions(+), 199 deletions(-) delete mode 100644 src/audio/ffmpeg_audio_processor_avresample.h diff --git a/CMakeLists.txt b/CMakeLists.txt index f8d6a32..4da2405 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -84,9 +84,6 @@ find_package(FFmpeg) if(FFMPEG_LIBRARIES) cmake_push_check_state(RESET) set(CMAKE_REQUIRED_LIBRARIES ${FFMPEG_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT} -lm) - check_function_exists(av_packet_unref HAVE_AV_PACKET_UNREF) - check_function_exists(av_frame_alloc HAVE_AV_FRAME_ALLOC) - check_function_exists(av_frame_free HAVE_AV_FRAME_FREE) cmake_pop_check_state() endif() @@ -163,14 +160,11 @@ message(STATUS "Using ${FFT_LIB} for FFT calculations") if(NOT AUDIO_PROCESSOR_LIB) if(FFMPEG_LIBSWRESAMPLE_FOUND) set(AUDIO_PROCESSOR_LIB "swresample") - elseif(FFMPEG_LIBAVRESAMPLE_FOUND) - set(AUDIO_PROCESSOR_LIB "avresample") endif() endif() if(AUDIO_PROCESSOR_LIB STREQUAL "swresample") if(FFMPEG_LIBSWRESAMPLE_FOUND) - set(USE_AVRESAMPLE OFF) set(USE_SWRESAMPLE ON) set(AUDIO_PROCESSOR_LIBRARIES ${FFMPEG_LIBSWRESAMPLE_LIBRARIES}) set(AUDIO_PROCESSOR_INCLUDE_DIRS ${FFMPEG_LIBSWRESAMPLE_INCLUDE_DIRS}) @@ -178,16 +172,6 @@ if(AUDIO_PROCESSOR_LIB STREQUAL "swresample") message(FATAL_ERROR "Selected ${AUDIO_PROCESSOR_LIB} for audio processing, but the library is not found") endif() message(STATUS "Using ${AUDIO_PROCESSOR_LIB} for audio conversion") -elseif(AUDIO_PROCESSOR_LIB STREQUAL "avresample") - if(FFMPEG_LIBAVRESAMPLE_FOUND) - set(USE_AVRESAMPLE ON) - set(USE_SWRESAMPLE OFF) - set(AUDIO_PROCESSOR_LIBRARIES ${FFMPEG_LIBAVRESAMPLE_LIBRARIES}) - set(AUDIO_PROCESSOR_INCLUDE_DIRS ${FFMPEG_LIBAVRESAMPLE_INCLUDE_DIRS}) - else() - message(FATAL_ERROR "Selected ${AUDIO_PROCESSOR_LIB} for audio processing, but the library is not found") - endif() - message(STATUS "Using ${AUDIO_PROCESSOR_LIB} for audio conversion") else() message(STATUS "Building without audio conversion support, please install FFmpeg with libswresample") endif() diff --git a/package/build.sh b/package/build.sh index da631ae..b41d36e 100755 --- a/package/build.sh +++ b/package/build.sh @@ -7,8 +7,8 @@ set -eux BASE_DIR=$(cd $(dirname $0)/.. && pwd) -FFMPEG_VERSION=4.4.1 -FFMPEG_BUILD_TAG=v4.4.1-1 +FFMPEG_VERSION=5.1.2 +FFMPEG_BUILD_TAG=v${FFMPEG_VERSION}-1 TMP_BUILD_DIR=$BASE_DIR/$(mktemp -d build.XXXXXXXX) trap 'rm -rf $TMP_BUILD_DIR' EXIT diff --git a/src/audio/ffmpeg_audio_processor.h b/src/audio/ffmpeg_audio_processor.h index 7628fc7..39f4f6d 100644 --- a/src/audio/ffmpeg_audio_processor.h +++ b/src/audio/ffmpeg_audio_processor.h @@ -10,8 +10,6 @@ #if defined(USE_SWRESAMPLE) #include "audio/ffmpeg_audio_processor_swresample.h" -#elif defined(USE_AVRESAMPLE) -#include "audio/ffmpeg_audio_processor_avresample.h" #else #error "no audio processing library" #endif diff --git a/src/audio/ffmpeg_audio_processor_avresample.h b/src/audio/ffmpeg_audio_processor_avresample.h deleted file mode 100644 index bd85f92..0000000 --- a/src/audio/ffmpeg_audio_processor_avresample.h +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (C) 2016 Lukas Lalinsky -// Distributed under the MIT license, see the LICENSE file for details. - -#ifndef CHROMAPRINT_AUDIO_FFMPEG_AUDIO_PROCESSOR_AVRESAMPLE_H_ -#define CHROMAPRINT_AUDIO_FFMPEG_AUDIO_PROCESSOR_AVRESAMPLE_H_ - -extern "C" { -#include -} - -namespace chromaprint { - -class FFmpegAudioProcessor { -public: - FFmpegAudioProcessor() { - m_resample_ctx = avresample_alloc_context(); - } - - ~FFmpegAudioProcessor() { - avresample_free(&m_resample_ctx); - } - - void SetCompatibleMode() { - av_opt_set_int(m_resample_ctx, "filter_size", 16, 0); - av_opt_set_int(m_resample_ctx, "phase_shift", 8, 0); - av_opt_set_int(m_resample_ctx, "linear_interp", 1, 0); - av_opt_set_double(m_resample_ctx, "cutoff", 0.8, 0); - } - - void SetInputChannelLayout(int64_t channel_layout) { - av_opt_set_int(m_resample_ctx, "in_channel_layout", channel_layout, 0); - } - - void SetInputSampleFormat(AVSampleFormat sample_format) { - av_opt_set_int(m_resample_ctx, "in_sample_fmt", sample_format, 0); - } - - void SetInputSampleRate(int sample_rate) { - av_opt_set_int(m_resample_ctx, "in_sample_rate", sample_rate, 0); - } - - void SetOutputChannelLayout(int64_t channel_layout) { - av_opt_set_int(m_resample_ctx, "out_channel_layout", channel_layout, 0); - } - - void SetOutputSampleFormat(AVSampleFormat sample_format) { - av_opt_set_int(m_resample_ctx, "out_sample_fmt", sample_format, 0); - } - - void SetOutputSampleRate(int sample_rate) { - av_opt_set_int(m_resample_ctx, "out_sample_fmt", sample_rate, 0); - } - - int Init() { - return avresample_open(m_resample_ctx); - } - - int Convert(uint8_t **out, int out_count, const uint8_t **in, int in_count) { - return avresample_convert(m_resample_ctx, out, 0, out_count, (uint8_t **) in, 0, in_count); - } - - int Flush(uint8_t **out, int out_count) { - return avresample_read(m_resample_ctx, out, out_count); - } - -private: - AVAudioResampleContext *m_resample_ctx = nullptr; -}; - -}; // namespace chromaprint - -#endif diff --git a/src/audio/ffmpeg_audio_processor_swresample.h b/src/audio/ffmpeg_audio_processor_swresample.h index b86266b..b1d4bea 100644 --- a/src/audio/ffmpeg_audio_processor_swresample.h +++ b/src/audio/ffmpeg_audio_processor_swresample.h @@ -28,30 +28,28 @@ class FFmpegAudioProcessor { av_opt_set_double(m_swr_ctx, "cutoff", 0.8, 0); } - void SetInputChannelLayout(int64_t channel_layout) { - av_opt_set_int(m_swr_ctx, "icl", channel_layout, 0); - av_opt_set_int(m_swr_ctx, "ich", av_get_channel_layout_nb_channels(channel_layout), 0); + void SetInputChannelLayout(AVChannelLayout *channel_layout) { + av_opt_set_int(m_swr_ctx, "in_channel_layout", channel_layout->u.mask, 0); } void SetInputSampleFormat(AVSampleFormat sample_format) { - av_opt_set_int(m_swr_ctx, "isf", sample_format, 0); + av_opt_set_sample_fmt(m_swr_ctx, "in_sample_fmt", sample_format, 0); } void SetInputSampleRate(int sample_rate) { - av_opt_set_int(m_swr_ctx, "isr", sample_rate, 0); + av_opt_set_int(m_swr_ctx, "in_sample_rate", sample_rate, 0); } - void SetOutputChannelLayout(int64_t channel_layout) { - av_opt_set_int(m_swr_ctx, "ocl", channel_layout, 0); - av_opt_set_int(m_swr_ctx, "och", av_get_channel_layout_nb_channels(channel_layout), 0); + void SetOutputChannelLayout(AVChannelLayout *channel_layout) { + av_opt_set_int(m_swr_ctx, "out_channel_layout", channel_layout->u.mask, 0); } void SetOutputSampleFormat(AVSampleFormat sample_format) { - av_opt_set_int(m_swr_ctx, "osf", sample_format, 0); + av_opt_set_sample_fmt(m_swr_ctx, "out_sample_fmt", sample_format, 0); } void SetOutputSampleRate(int sample_rate) { - av_opt_set_int(m_swr_ctx, "osr", sample_rate, 0); + av_opt_set_int(m_swr_ctx, "out_sample_rate", sample_rate, 0); } int Init() { diff --git a/src/audio/ffmpeg_audio_reader.h b/src/audio/ffmpeg_audio_reader.h index 5550164..1c6b346 100644 --- a/src/audio/ffmpeg_audio_reader.h +++ b/src/audio/ffmpeg_audio_reader.h @@ -62,7 +62,7 @@ class FFmpegAudioReader { bool Read(const int16_t **data, size_t *size); bool IsOpen() const { return m_opened; } - bool IsFinished() const { return m_finished && !m_got_frame; } + bool IsFinished() const { return !m_has_more_packets && !m_has_more_frames; } std::string GetError() const { return m_error; } int GetErrorCode() const { return m_error_code; } @@ -74,20 +74,19 @@ class FFmpegAudioReader { uint8_t *m_convert_buffer[1] = { nullptr }; int m_convert_buffer_nb_samples = 0; - AVInputFormat *m_input_fmt = nullptr; + const AVInputFormat *m_input_fmt = nullptr; AVDictionary *m_input_opts = nullptr; AVFormatContext *m_format_ctx = nullptr; AVCodecContext *m_codec_ctx = nullptr; - AVFrame *m_frame = nullptr; int m_stream_index = -1; std::string m_error; int m_error_code = 0; - bool m_finished = false; bool m_opened = false; - int m_got_frame = 0; - AVPacket m_packet; - AVPacket m_packet0; + bool m_has_more_packets = true; + bool m_has_more_frames = true; + AVPacket *m_packet = nullptr; + AVFrame *m_frame = nullptr; int m_output_sample_rate = 0; int m_output_channels = 0; @@ -98,19 +97,12 @@ class FFmpegAudioReader { inline FFmpegAudioReader::FFmpegAudioReader() { av_log_set_level(AV_LOG_QUIET); - - av_init_packet(&m_packet); - m_packet.data = nullptr; - m_packet.size = 0; - - m_packet0 = m_packet; } inline FFmpegAudioReader::~FFmpegAudioReader() { Close(); av_dict_free(&m_input_opts); av_freep(&m_convert_buffer[0]); - av_packet_unref(&m_packet0); } inline bool FFmpegAudioReader::SetInputFormat(const char *name) { @@ -135,11 +127,10 @@ inline bool FFmpegAudioReader::Open(const std::string &file_name) { Close(); - av_init_packet(&m_packet); - m_packet.data = nullptr; - m_packet.size = 0; - - m_packet0 = m_packet; + m_packet = av_packet_alloc(); + if (!m_packet) { + return false; + } ret = avformat_open_input(&m_format_ctx, file_name.c_str(), m_input_fmt, &m_input_opts); if (ret < 0) { @@ -153,26 +144,31 @@ inline bool FFmpegAudioReader::Open(const std::string &file_name) { return false; } - AVCodec *codec; + const AVCodec *codec; ret = av_find_best_stream(m_format_ctx, AVMEDIA_TYPE_AUDIO, -1, -1, &codec, 0); if (ret < 0) { SetError("Could not find any audio stream in the file", ret); return false; } m_stream_index = ret; + auto stream = m_format_ctx->streams[m_stream_index]; - m_codec_ctx = m_format_ctx->streams[m_stream_index]->codec; + m_codec_ctx = avcodec_alloc_context3(codec); m_codec_ctx->request_sample_fmt = AV_SAMPLE_FMT_S16; + ret = avcodec_parameters_to_context(m_codec_ctx, stream->codecpar); + if (ret < 0) { + SetError("Could not copy the stream parameters", ret); + return false; + } + ret = avcodec_open2(m_codec_ctx, codec, nullptr); if (ret < 0) { SetError("Could not open the codec", ret); return false; } - if (!m_codec_ctx->channel_layout) { - m_codec_ctx->channel_layout = av_get_default_channel_layout(m_codec_ctx->channels); - } + av_dump_format(m_format_ctx, 0, "foo", 0); m_frame = av_frame_alloc(); if (!m_frame) { @@ -183,19 +179,23 @@ inline bool FFmpegAudioReader::Open(const std::string &file_name) { m_output_sample_rate = m_codec_ctx->sample_rate; } - if (!m_output_channels) { - m_output_channels = m_codec_ctx->channels; + AVChannelLayout output_channel_layout; + if (m_output_channels) { + av_channel_layout_default(&output_channel_layout, m_output_channels); + } else { + m_output_channels = m_codec_ctx->ch_layout.nb_channels; + av_channel_layout_default(&output_channel_layout, m_output_channels); } - if (m_codec_ctx->sample_fmt != AV_SAMPLE_FMT_S16 || m_codec_ctx->channels != m_output_channels || m_codec_ctx->sample_rate != m_output_sample_rate) { + if (m_codec_ctx->sample_fmt != AV_SAMPLE_FMT_S16 || m_codec_ctx->ch_layout.nb_channels != m_output_channels || m_codec_ctx->sample_rate != m_output_sample_rate) { m_converter.reset(new FFmpegAudioProcessor()); m_converter->SetCompatibleMode(); m_converter->SetInputSampleFormat(m_codec_ctx->sample_fmt); m_converter->SetInputSampleRate(m_codec_ctx->sample_rate); - m_converter->SetInputChannelLayout(m_codec_ctx->channel_layout); + m_converter->SetInputChannelLayout(&(m_codec_ctx->ch_layout)); m_converter->SetOutputSampleFormat(AV_SAMPLE_FMT_S16); m_converter->SetOutputSampleRate(m_output_sample_rate); - m_converter->SetOutputChannelLayout(av_get_default_channel_layout(m_output_channels)); + m_converter->SetOutputChannelLayout(&output_channel_layout); auto ret = m_converter->Init(); if (ret != 0) { SetError("Could not create an audio converter instance", ret); @@ -203,10 +203,11 @@ inline bool FFmpegAudioReader::Open(const std::string &file_name) { } } + av_channel_layout_uninit(&output_channel_layout); + m_opened = true; - m_finished = false; - m_got_frame = 0; - m_nb_packets = 0; + m_has_more_packets = true; + m_has_more_frames = true; m_decode_error = 0; return true; @@ -214,6 +215,7 @@ inline bool FFmpegAudioReader::Open(const std::string &file_name) { inline void FFmpegAudioReader::Close() { av_frame_free(&m_frame); + av_packet_free(&m_packet); m_stream_index = -1; @@ -252,91 +254,98 @@ inline bool FFmpegAudioReader::Read(const int16_t **data, size_t *size) { return false; } + *data = nullptr; + *size = 0; + int ret; + bool needs_packet = false; while (true) { - while (m_packet.size <= 0) { - av_packet_unref(&m_packet0); - av_init_packet(&m_packet); - m_packet.data = nullptr; - m_packet.size = 0; - ret = av_read_frame(m_format_ctx, &m_packet); + while (needs_packet && m_packet->size == 0) { + ret = av_read_frame(m_format_ctx, m_packet); if (ret < 0) { if (ret == AVERROR_EOF) { - m_finished = true; + needs_packet = false; + m_has_more_packets = false; break; - } else { + } + SetError("Error reading from the audio source", ret); + return false; + } + if (m_packet->stream_index == m_stream_index) { + needs_packet = false; + } else { + av_packet_unref(m_packet); + } + } + + if (m_packet->size != 0) { + ret = avcodec_send_packet(m_codec_ctx, m_packet); + if (ret < 0) { + if (ret != AVERROR(EAGAIN)) { SetError("Error reading from the audio source", ret); return false; } - } - m_packet0 = m_packet; - if (m_packet.stream_index != m_stream_index) { - m_packet.data = nullptr; - m_packet.size = 0; } else { - m_nb_packets++; + av_packet_unref(m_packet); } } - ret = avcodec_decode_audio4(m_codec_ctx, m_frame, &m_got_frame, &m_packet); + ret = avcodec_receive_frame(m_codec_ctx, m_frame); if (ret < 0) { - if (m_decode_error) { - SetError("Error decoding audio frame", m_decode_error); - return false; + if (ret == AVERROR_EOF) { + m_has_more_frames = false; + } else if (ret == AVERROR(EAGAIN)) { + if (m_has_more_packets) { + needs_packet = true; + continue; + } else { + m_has_more_frames = false; + } } - m_decode_error = ret; - m_packet.data = nullptr; - m_packet.size = 0; - continue; + SetError("Error decoding the audio source", ret); + return false; } - break; - } - - m_decode_error = 0; - - const int decoded = std::min(ret, m_packet.size); - m_packet.data += decoded; - m_packet.size -= decoded; - - if (m_got_frame) { - if (m_converter) { - if (m_frame->nb_samples > m_convert_buffer_nb_samples) { - int linsize; - av_freep(&m_convert_buffer[0]); - m_convert_buffer_nb_samples = std::max(1024 * 8, m_frame->nb_samples); - ret = av_samples_alloc(m_convert_buffer, &linsize, m_codec_ctx->channels, m_convert_buffer_nb_samples, AV_SAMPLE_FMT_S16, 1); - if (ret < 0) { - SetError("Couldn't allocate audio converter buffer", ret); + if (m_frame->nb_samples > 0) { + if (m_converter) { + if (m_frame->nb_samples > m_convert_buffer_nb_samples) { + int linsize; + av_freep(&m_convert_buffer[0]); + m_convert_buffer_nb_samples = std::max(1024 * 8, m_frame->nb_samples); + ret = av_samples_alloc(m_convert_buffer, &linsize, m_codec_ctx->ch_layout.nb_channels, m_convert_buffer_nb_samples, AV_SAMPLE_FMT_S16, 1); + if (ret < 0) { + SetError("Couldn't allocate audio converter buffer", ret); + return false; + } + } + auto nb_samples = m_converter->Convert(m_convert_buffer, m_convert_buffer_nb_samples, (const uint8_t **) m_frame->data, m_frame->nb_samples); + if (nb_samples < 0) { + SetError("Couldn't convert audio", ret); return false; } - } - auto nb_samples = m_converter->Convert(m_convert_buffer, m_convert_buffer_nb_samples, (const uint8_t **) m_frame->data, m_frame->nb_samples); - if (nb_samples < 0) { - SetError("Couldn't convert audio", ret); - return false; - } - *data = (const int16_t *) m_convert_buffer[0]; - *size = nb_samples; - } else { - *data = (const int16_t *) m_frame->data[0]; - *size = m_frame->nb_samples; - } - } else { - if (m_finished && m_converter) { - auto nb_samples = m_converter->Flush(m_convert_buffer, m_convert_buffer_nb_samples); - if (nb_samples < 0) { - SetError("Couldn't convert audio", ret); - return false; - } else if (nb_samples > 0) { - m_got_frame = 1; *data = (const int16_t *) m_convert_buffer[0]; *size = nb_samples; + } else { + *data = (const int16_t *) m_frame->data[0]; + *size = m_frame->nb_samples; + } + } else { + if (m_converter) { + if (IsFinished()) { + auto nb_samples = m_converter->Flush(m_convert_buffer, m_convert_buffer_nb_samples); + if (nb_samples < 0) { + SetError("Couldn't convert audio", ret); + return false; + } else if (nb_samples > 0) { + *data = (const int16_t *) m_convert_buffer[0]; + *size = nb_samples; + } + } } } - } - return true; + return true; + } } inline void FFmpegAudioReader::SetError(const char *message, int errnum) { diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a2b517b..123e643 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -38,6 +38,12 @@ set(SRCS if(BUILD_TOOLS) set(SRCS ${SRCS} ../src/audio/ffmpeg_audio_reader_test.cpp) + include_directories( + ${FFMPEG_LIBAVFORMAT_INCLUDE_DIRS} + ${FFMPEG_LIBAVCODEC_INCLUDE_DIRS} + ${FFMPEG_LIBAVUTIL_INCLUDE_DIRS} + ${AUDIO_PROCESSOR_INCLUDE_DIRS} + ) link_libraries(fpcalc_libs) endif()