fix: release GPU surfaces immediately after hw transfer

The nvidia-vaapi-driver would fail with "list argument exceeds maximum
number" when decoding HEVC because GPU surfaces were being held in the
packet queue after transfer, exhausting the VAAPI surface pool.

Changes:
- Transfer hw frames to software immediately in receive_frame() while
  the VA context is still valid, then release the GPU surface
- Check hw_frames_ctx in needs_hw_transfer() to detect already-transferred
  frames
- Remove extra_hw_frames and thread_count settings (not needed with
  immediate surface release)
- Fix EAGAIN handling in send_packet to wait instead of busy-loop

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
pull/4596/head
Isaac Connor 2026-02-04 22:51:21 -05:00
parent c012f55962
commit 94f3a5771b
3 changed files with 54 additions and 11 deletions

View File

@ -514,10 +514,6 @@ int FfmpegCamera::OpenFfmpeg() {
mVideoCodecContext->opaque = &hw_pix_fmt;
mVideoCodecContext->get_format = get_hw_format;
mVideoCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
// Allocate extra surfaces for reference frames - HEVC can use up to 16
mVideoCodecContext->extra_hw_frames = 16;
// Use single-threaded decoding for hardware decoders
mVideoCodecContext->thread_count = 1;
}
} else {
Debug(1, "Failed to find suitable hw_pix_fmt.");

View File

@ -2960,8 +2960,9 @@ bool Monitor::Decode() {
if (ret == 0) {
// EAGAIN - decoder's input buffer is full, need to drain first
Debug(2, "send_packet returned EAGAIN, will retry");
return true;
// Return false to let caller wait before retrying
Debug(2, "send_packet returned EAGAIN, waiting before retry");
return false;
} else if (ret < 0) {
// Error
Debug(1, "send_packet failed: %d", ret);

View File

@ -113,7 +113,7 @@ int ZMPacket::send_packet(AVCodecContext *ctx) {
return ret;
}
}
Debug(1, "Ret from send_packet %d %s, packet %d", ret, av_make_error_string(ret).c_str(), image_index);
Debug(3, "Ret from send_packet %d %s, packet %d", ret, av_make_error_string(ret).c_str(), image_index);
return 1;
}
@ -135,8 +135,36 @@ int ZMPacket::receive_frame(AVCodecContext *ctx) {
}
}
// For hardware frames, do the transfer immediately while the context is valid
// The nvidia-vaapi-driver can have issues if there's a delay between decode and transfer
#if HAVE_LIBAVUTIL_HWCONTEXT_H
#if LIBAVCODEC_VERSION_CHECK(57, 89, 0, 89, 0)
if (receive_frame->hw_frames_ctx) {
Debug(2, "Hardware frame received, transferring immediately");
av_frame_ptr sw_frame{av_frame_alloc()};
ret = av_hwframe_transfer_data(sw_frame.get(), receive_frame.get(), 0);
if (ret < 0) {
Error("Immediate hw transfer failed: %s, packet %d", av_make_error_string(ret).c_str(), image_index);
return ret;
}
ret = av_frame_copy_props(sw_frame.get(), receive_frame.get());
if (ret < 0) {
Warning("Failed to copy frame props: %s", av_make_error_string(ret).c_str());
}
// Release GPU surface immediately - we have the software frame now
// Keeping hw_frame would hold GPU memory and exhaust the surface pool
// receive_frame goes out of scope here and releases the surface
in_frame = std::move(sw_frame);
zm_dump_video_frame(in_frame.get(), "After immediate hwtransfer");
} else {
in_frame = std::move(receive_frame);
}
#else
in_frame = std::move(receive_frame);
//zm_dump_video_frame(in_frame.get(), "got frame");
#endif
#else
in_frame = std::move(receive_frame);
#endif
return 1;
} // end int ZMPacket::receive_frame(AVCodecContext *ctx)
@ -148,6 +176,10 @@ bool ZMPacket::needs_hw_transfer(AVCodecContext *ctx) {
}
#if HAVE_LIBAVUTIL_HWCONTEXT_H
#if LIBAVCODEC_VERSION_CHECK(57, 89, 0, 89, 0)
// If frame has no hw_frames_ctx, it's already a software frame
if (!in_frame->hw_frames_ctx) {
return false;
}
if (
(ctx->sw_pix_fmt != AV_PIX_FMT_NONE)
and
@ -162,8 +194,9 @@ bool ZMPacket::needs_hw_transfer(AVCodecContext *ctx) {
int ZMPacket::transfer_hwframe(AVCodecContext *ctx) {
if (hw_frame) {
Error("Already have hw_frame in get_hwframe");
return 0;
// Already transferred in receive_frame
Debug(2, "Hardware frame already transferred");
return 1;
}
#if HAVE_LIBAVUTIL_HWCONTEXT_H
#if LIBAVCODEC_VERSION_CHECK(57, 89, 0, 89, 0)
@ -182,11 +215,24 @@ int ZMPacket::transfer_hwframe(AVCodecContext *ctx) {
hw_frame = std::move(in_frame);
zm_dump_video_frame(hw_frame.get(), "Before hwtransfer");
// Verify hw_frames_ctx is valid before attempting transfer
if (!hw_frame->hw_frames_ctx) {
Error("Hardware frame has no hw_frames_ctx, cannot transfer");
hw_frame = nullptr;
in_frame = nullptr;
return -1;
}
av_frame_ptr new_frame{av_frame_alloc()};
// Don't set format - let FFmpeg use the hw_frames_ctx default format
// (nvidia-vaapi-driver only supports nv12/p010 transfer, not yuvj420p)
// The later swscale conversion will handle format conversion
/* retrieve data from GPU to CPU */
int ret = av_hwframe_transfer_data(new_frame.get(), hw_frame.get(), 0);
if (ret < 0) {
Error("Unable to transfer frame: %s, continuing", av_make_error_string(ret).c_str());
Error("Unable to transfer frame: %s", av_make_error_string(ret).c_str());
hw_frame = nullptr;
in_frame = nullptr;
return ret;
}