From 94f3a5771b3261d045b08845b79de001a272efdc Mon Sep 17 00:00:00 2001 From: Isaac Connor Date: Wed, 4 Feb 2026 22:51:21 -0500 Subject: [PATCH] fix: release GPU surfaces immediately after hw transfer The nvidia-vaapi-driver would fail with "list argument exceeds maximum number" when decoding HEVC because GPU surfaces were being held in the packet queue after transfer, exhausting the VAAPI surface pool. Changes: - Transfer hw frames to software immediately in receive_frame() while the VA context is still valid, then release the GPU surface - Check hw_frames_ctx in needs_hw_transfer() to detect already-transferred frames - Remove extra_hw_frames and thread_count settings (not needed with immediate surface release) - Fix EAGAIN handling in send_packet to wait instead of busy-loop Co-Authored-By: Claude Opus 4.5 --- src/zm_ffmpeg_camera.cpp | 4 --- src/zm_monitor.cpp | 5 ++-- src/zm_packet.cpp | 56 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/src/zm_ffmpeg_camera.cpp b/src/zm_ffmpeg_camera.cpp index 339e8e56a..38c0f06a5 100644 --- a/src/zm_ffmpeg_camera.cpp +++ b/src/zm_ffmpeg_camera.cpp @@ -514,10 +514,6 @@ int FfmpegCamera::OpenFfmpeg() { mVideoCodecContext->opaque = &hw_pix_fmt; mVideoCodecContext->get_format = get_hw_format; mVideoCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx); - // Allocate extra surfaces for reference frames - HEVC can use up to 16 - mVideoCodecContext->extra_hw_frames = 16; - // Use single-threaded decoding for hardware decoders - mVideoCodecContext->thread_count = 1; } } else { Debug(1, "Failed to find suitable hw_pix_fmt."); diff --git a/src/zm_monitor.cpp b/src/zm_monitor.cpp index 1e76ad100..a50052965 100644 --- a/src/zm_monitor.cpp +++ b/src/zm_monitor.cpp @@ -2960,8 +2960,9 @@ bool Monitor::Decode() { if (ret == 0) { // EAGAIN - decoder's input buffer is full, need to drain first - Debug(2, "send_packet returned EAGAIN, will retry"); - return true; + // Return false to let caller wait before retrying + Debug(2, "send_packet returned EAGAIN, waiting before retry"); + return false; } else if (ret < 0) { // Error Debug(1, "send_packet failed: %d", ret); diff --git a/src/zm_packet.cpp b/src/zm_packet.cpp index e85592fd2..95edaf49e 100644 --- a/src/zm_packet.cpp +++ b/src/zm_packet.cpp @@ -113,7 +113,7 @@ int ZMPacket::send_packet(AVCodecContext *ctx) { return ret; } } - Debug(1, "Ret from send_packet %d %s, packet %d", ret, av_make_error_string(ret).c_str(), image_index); + Debug(3, "Ret from send_packet %d %s, packet %d", ret, av_make_error_string(ret).c_str(), image_index); return 1; } @@ -135,8 +135,36 @@ int ZMPacket::receive_frame(AVCodecContext *ctx) { } } + // For hardware frames, do the transfer immediately while the context is valid + // The nvidia-vaapi-driver can have issues if there's a delay between decode and transfer +#if HAVE_LIBAVUTIL_HWCONTEXT_H +#if LIBAVCODEC_VERSION_CHECK(57, 89, 0, 89, 0) + if (receive_frame->hw_frames_ctx) { + Debug(2, "Hardware frame received, transferring immediately"); + av_frame_ptr sw_frame{av_frame_alloc()}; + ret = av_hwframe_transfer_data(sw_frame.get(), receive_frame.get(), 0); + if (ret < 0) { + Error("Immediate hw transfer failed: %s, packet %d", av_make_error_string(ret).c_str(), image_index); + return ret; + } + ret = av_frame_copy_props(sw_frame.get(), receive_frame.get()); + if (ret < 0) { + Warning("Failed to copy frame props: %s", av_make_error_string(ret).c_str()); + } + // Release GPU surface immediately - we have the software frame now + // Keeping hw_frame would hold GPU memory and exhaust the surface pool + // receive_frame goes out of scope here and releases the surface + in_frame = std::move(sw_frame); + zm_dump_video_frame(in_frame.get(), "After immediate hwtransfer"); + } else { + in_frame = std::move(receive_frame); + } +#else in_frame = std::move(receive_frame); - //zm_dump_video_frame(in_frame.get(), "got frame"); +#endif +#else + in_frame = std::move(receive_frame); +#endif return 1; } // end int ZMPacket::receive_frame(AVCodecContext *ctx) @@ -148,6 +176,10 @@ bool ZMPacket::needs_hw_transfer(AVCodecContext *ctx) { } #if HAVE_LIBAVUTIL_HWCONTEXT_H #if LIBAVCODEC_VERSION_CHECK(57, 89, 0, 89, 0) + // If frame has no hw_frames_ctx, it's already a software frame + if (!in_frame->hw_frames_ctx) { + return false; + } if ( (ctx->sw_pix_fmt != AV_PIX_FMT_NONE) and @@ -162,8 +194,9 @@ bool ZMPacket::needs_hw_transfer(AVCodecContext *ctx) { int ZMPacket::transfer_hwframe(AVCodecContext *ctx) { if (hw_frame) { - Error("Already have hw_frame in get_hwframe"); - return 0; + // Already transferred in receive_frame + Debug(2, "Hardware frame already transferred"); + return 1; } #if HAVE_LIBAVUTIL_HWCONTEXT_H #if LIBAVCODEC_VERSION_CHECK(57, 89, 0, 89, 0) @@ -182,11 +215,24 @@ int ZMPacket::transfer_hwframe(AVCodecContext *ctx) { hw_frame = std::move(in_frame); zm_dump_video_frame(hw_frame.get(), "Before hwtransfer"); + // Verify hw_frames_ctx is valid before attempting transfer + if (!hw_frame->hw_frames_ctx) { + Error("Hardware frame has no hw_frames_ctx, cannot transfer"); + hw_frame = nullptr; + in_frame = nullptr; + return -1; + } + av_frame_ptr new_frame{av_frame_alloc()}; + // Don't set format - let FFmpeg use the hw_frames_ctx default format + // (nvidia-vaapi-driver only supports nv12/p010 transfer, not yuvj420p) + // The later swscale conversion will handle format conversion + /* retrieve data from GPU to CPU */ int ret = av_hwframe_transfer_data(new_frame.get(), hw_frame.get(), 0); if (ret < 0) { - Error("Unable to transfer frame: %s, continuing", av_make_error_string(ret).c_str()); + Error("Unable to transfer frame: %s", av_make_error_string(ret).c_str()); + hw_frame = nullptr; in_frame = nullptr; return ret; }