fix: release GPU surfaces immediately after hw transfer
The nvidia-vaapi-driver would fail with "list argument exceeds maximum number" when decoding HEVC because GPU surfaces were being held in the packet queue after transfer, exhausting the VAAPI surface pool. Changes: - Transfer hw frames to software immediately in receive_frame() while the VA context is still valid, then release the GPU surface - Check hw_frames_ctx in needs_hw_transfer() to detect already-transferred frames - Remove extra_hw_frames and thread_count settings (not needed with immediate surface release) - Fix EAGAIN handling in send_packet to wait instead of busy-loop Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>pull/4596/head
parent
c012f55962
commit
94f3a5771b
|
|
@ -514,10 +514,6 @@ int FfmpegCamera::OpenFfmpeg() {
|
|||
mVideoCodecContext->opaque = &hw_pix_fmt;
|
||||
mVideoCodecContext->get_format = get_hw_format;
|
||||
mVideoCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
|
||||
// Allocate extra surfaces for reference frames - HEVC can use up to 16
|
||||
mVideoCodecContext->extra_hw_frames = 16;
|
||||
// Use single-threaded decoding for hardware decoders
|
||||
mVideoCodecContext->thread_count = 1;
|
||||
}
|
||||
} else {
|
||||
Debug(1, "Failed to find suitable hw_pix_fmt.");
|
||||
|
|
|
|||
|
|
@ -2960,8 +2960,9 @@ bool Monitor::Decode() {
|
|||
|
||||
if (ret == 0) {
|
||||
// EAGAIN - decoder's input buffer is full, need to drain first
|
||||
Debug(2, "send_packet returned EAGAIN, will retry");
|
||||
return true;
|
||||
// Return false to let caller wait before retrying
|
||||
Debug(2, "send_packet returned EAGAIN, waiting before retry");
|
||||
return false;
|
||||
} else if (ret < 0) {
|
||||
// Error
|
||||
Debug(1, "send_packet failed: %d", ret);
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ int ZMPacket::send_packet(AVCodecContext *ctx) {
|
|||
return ret;
|
||||
}
|
||||
}
|
||||
Debug(1, "Ret from send_packet %d %s, packet %d", ret, av_make_error_string(ret).c_str(), image_index);
|
||||
Debug(3, "Ret from send_packet %d %s, packet %d", ret, av_make_error_string(ret).c_str(), image_index);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -135,8 +135,36 @@ int ZMPacket::receive_frame(AVCodecContext *ctx) {
|
|||
}
|
||||
}
|
||||
|
||||
// For hardware frames, do the transfer immediately while the context is valid
|
||||
// The nvidia-vaapi-driver can have issues if there's a delay between decode and transfer
|
||||
#if HAVE_LIBAVUTIL_HWCONTEXT_H
|
||||
#if LIBAVCODEC_VERSION_CHECK(57, 89, 0, 89, 0)
|
||||
if (receive_frame->hw_frames_ctx) {
|
||||
Debug(2, "Hardware frame received, transferring immediately");
|
||||
av_frame_ptr sw_frame{av_frame_alloc()};
|
||||
ret = av_hwframe_transfer_data(sw_frame.get(), receive_frame.get(), 0);
|
||||
if (ret < 0) {
|
||||
Error("Immediate hw transfer failed: %s, packet %d", av_make_error_string(ret).c_str(), image_index);
|
||||
return ret;
|
||||
}
|
||||
ret = av_frame_copy_props(sw_frame.get(), receive_frame.get());
|
||||
if (ret < 0) {
|
||||
Warning("Failed to copy frame props: %s", av_make_error_string(ret).c_str());
|
||||
}
|
||||
// Release GPU surface immediately - we have the software frame now
|
||||
// Keeping hw_frame would hold GPU memory and exhaust the surface pool
|
||||
// receive_frame goes out of scope here and releases the surface
|
||||
in_frame = std::move(sw_frame);
|
||||
zm_dump_video_frame(in_frame.get(), "After immediate hwtransfer");
|
||||
} else {
|
||||
in_frame = std::move(receive_frame);
|
||||
}
|
||||
#else
|
||||
in_frame = std::move(receive_frame);
|
||||
//zm_dump_video_frame(in_frame.get(), "got frame");
|
||||
#endif
|
||||
#else
|
||||
in_frame = std::move(receive_frame);
|
||||
#endif
|
||||
|
||||
return 1;
|
||||
} // end int ZMPacket::receive_frame(AVCodecContext *ctx)
|
||||
|
|
@ -148,6 +176,10 @@ bool ZMPacket::needs_hw_transfer(AVCodecContext *ctx) {
|
|||
}
|
||||
#if HAVE_LIBAVUTIL_HWCONTEXT_H
|
||||
#if LIBAVCODEC_VERSION_CHECK(57, 89, 0, 89, 0)
|
||||
// If frame has no hw_frames_ctx, it's already a software frame
|
||||
if (!in_frame->hw_frames_ctx) {
|
||||
return false;
|
||||
}
|
||||
if (
|
||||
(ctx->sw_pix_fmt != AV_PIX_FMT_NONE)
|
||||
and
|
||||
|
|
@ -162,8 +194,9 @@ bool ZMPacket::needs_hw_transfer(AVCodecContext *ctx) {
|
|||
|
||||
int ZMPacket::transfer_hwframe(AVCodecContext *ctx) {
|
||||
if (hw_frame) {
|
||||
Error("Already have hw_frame in get_hwframe");
|
||||
return 0;
|
||||
// Already transferred in receive_frame
|
||||
Debug(2, "Hardware frame already transferred");
|
||||
return 1;
|
||||
}
|
||||
#if HAVE_LIBAVUTIL_HWCONTEXT_H
|
||||
#if LIBAVCODEC_VERSION_CHECK(57, 89, 0, 89, 0)
|
||||
|
|
@ -182,11 +215,24 @@ int ZMPacket::transfer_hwframe(AVCodecContext *ctx) {
|
|||
hw_frame = std::move(in_frame);
|
||||
zm_dump_video_frame(hw_frame.get(), "Before hwtransfer");
|
||||
|
||||
// Verify hw_frames_ctx is valid before attempting transfer
|
||||
if (!hw_frame->hw_frames_ctx) {
|
||||
Error("Hardware frame has no hw_frames_ctx, cannot transfer");
|
||||
hw_frame = nullptr;
|
||||
in_frame = nullptr;
|
||||
return -1;
|
||||
}
|
||||
|
||||
av_frame_ptr new_frame{av_frame_alloc()};
|
||||
// Don't set format - let FFmpeg use the hw_frames_ctx default format
|
||||
// (nvidia-vaapi-driver only supports nv12/p010 transfer, not yuvj420p)
|
||||
// The later swscale conversion will handle format conversion
|
||||
|
||||
/* retrieve data from GPU to CPU */
|
||||
int ret = av_hwframe_transfer_data(new_frame.get(), hw_frame.get(), 0);
|
||||
if (ret < 0) {
|
||||
Error("Unable to transfer frame: %s, continuing", av_make_error_string(ret).c_str());
|
||||
Error("Unable to transfer frame: %s", av_make_error_string(ret).c_str());
|
||||
hw_frame = nullptr;
|
||||
in_frame = nullptr;
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue