add flushing the resample buffer to get the remaining samples encoded

2019-09-13 10:55:30 -04:00 · 2019-09-13 10:55:30 -04:00 · 701aa8d924
parent 16a7ab1392
commit 701aa8d924
5 changed files with 84 additions and 129 deletions
--- a/src/zm_ffmpeg.cpp
+++ b/src/zm_ffmpeg.cpp
@ -505,7 +505,10 @@ int zm_receive_packet(AVCodecContext *context, AVPacket &packet) {
 #endif
 }  // end int zm_receive_packet(AVCodecContext *context, AVPacket &packet)

-int zm_receive_frame(AVCodecContext *context, AVFrame *frame, AVPacket &packet) {
+int zm_send_packet_receive_frame(
+    AVCodecContext *context,
+    AVFrame *frame,
+    AVPacket &packet) {
  int ret;
 #if LIBAVCODEC_VERSION_CHECK(57, 64, 0, 64, 0)
  if ( (ret = avcodec_send_packet(context, &packet)) < 0 ) {
@ -533,29 +536,31 @@ int zm_receive_frame(AVCodecContext *context, AVFrame *frame, AVPacket &packet)
    }
  } // end while !frameComplete
 #endif
-  return 0;
-} // end int zm_receive_frame(AVCodecContext *context, AVFrame *frame, AVPacket &packet)
+  return 1;
+} // end int zm_send_packet_receive_frame(AVCodecContext *context, AVFrame *frame, AVPacket &packet)

-int zm_send_frame(AVCodecContext *ctx, AVFrame *frame, AVPacket &packet) {
+/* Returns < 0 on error, 0 if codec not ready, 1 on success
+ */
+int zm_send_frame_receive_packet(AVCodecContext *ctx, AVFrame *frame, AVPacket &packet) {
  int ret;
  #if LIBAVCODEC_VERSION_CHECK(57, 64, 0, 64, 0)
    if ( (ret = avcodec_send_frame(ctx, frame)) < 0 ) {
      Error("Could not send frame (error '%s')",
            av_make_error_string(ret).c_str());
-      zm_av_packet_unref(&packet);
-      return 0;
+      return ret;
    }

    if ( (ret = avcodec_receive_packet(ctx, &packet)) < 0 ) {
      if ( AVERROR(EAGAIN) == ret ) {
        // The codec may need more samples than it has, perfectly valid
        Debug(2, "Codec not ready to give us a packet");
+        return 0;
      } else {
        Error("Could not recieve packet (error %d = '%s')", ret,
              av_make_error_string(ret).c_str());
      }
      zm_av_packet_unref(&packet);
-      return 0;
+      return ret;
    }
  #else
    int data_present;
@ -564,7 +569,7 @@ int zm_send_frame(AVCodecContext *ctx, AVFrame *frame, AVPacket &packet) {
      Error("Could not encode frame (error '%s')",
            av_make_error_string(ret).c_str());
      zm_av_packet_unref(&packet);
-      return 0;
+      return ret;
    }
    if ( !data_present ) {
      Debug(2, "Not ready to out a frame yet.");
@ -573,7 +578,7 @@ int zm_send_frame(AVCodecContext *ctx, AVFrame *frame, AVPacket &packet) {
    }
  #endif
  return 1;
-}  // wend zm_send_frame
+}  // end int zm_send_frame_receive_packet

 void dumpPacket(AVStream *stream, AVPacket *pkt, const char *text) {
  char b[10240];
--- a/src/zm_ffmpeg.h
+++ b/src/zm_ffmpeg.h
@ -354,8 +354,8 @@ bool is_audio_context(AVCodec *);

 int zm_receive_packet(AVCodecContext *context, AVPacket &packet);

-int zm_receive_frame(AVCodecContext *context, AVFrame *frame, AVPacket &packet);
-int zm_send_frame(AVCodecContext *context, AVFrame *frame, AVPacket &packet);
+int zm_send_packet_receive_frame(AVCodecContext *context, AVFrame *frame, AVPacket &packet);
+int zm_send_frame_receive_packet(AVCodecContext *context, AVFrame *frame, AVPacket &packet);

 void dumpPacket(AVStream *, AVPacket *,const char *text="");
 void dumpPacket(AVPacket *,const char *text="");
--- a/src/zm_ffmpeg_camera.cpp
+++ b/src/zm_ffmpeg_camera.cpp
@ -260,8 +260,8 @@ int FfmpegCamera::Capture(Image &image) {
        &&
        (keyframe || have_video_keyframe)
        ) {
-      ret = zm_receive_frame(mVideoCodecContext, mRawFrame, packet);
-      if ( ret < 0 ) {
+      ret = zm_send_packet_receive_frame(mVideoCodecContext, mRawFrame, packet);
+      if ( ret <= 0 ) {
        Error("Unable to get frame at frame %d: %s, continuing",
            frameCount, av_make_error_string(ret).c_str());
        zm_av_packet_unref(&packet);
@ -952,8 +952,8 @@ int FfmpegCamera::CaptureAndRecord(
        }
      }  // end if keyframe or have_video_keyframe

-      ret = zm_receive_frame(mVideoCodecContext, mRawFrame, packet);
-      if ( ret < 0 ) {
+      ret = zm_send_packet_receive_frame(mVideoCodecContext, mRawFrame, packet);
+      if ( ret <= 0 ) {
        Warning("Unable to receive frame %d: %s. error count is %d",
            frameCount, av_make_error_string(ret).c_str(), error_count);
        error_count += 1;
--- a/src/zm_ffmpeg_input.cpp
+++ b/src/zm_ffmpeg_input.cpp
@ -137,8 +137,8 @@ AVFrame *FFmpeg_Input::get_frame(int stream_id) {
      } else {
        frame = zm_av_frame_alloc();
      }
-      ret = zm_receive_frame(context, frame, packet);
-      if ( ret < 0 ) {
+      ret = zm_send_packet_receive_frame(context, frame, packet);
+      if ( ret <= 0 ) {
        Error("Unable to decode frame at frame %d: %s, continuing",
            streams[packet.stream_index].frame_count, av_make_error_string(ret).c_str());
        zm_av_packet_unref(&packet);
--- a/src/zm_videostore.cpp
+++ b/src/zm_videostore.cpp
@ -441,11 +441,39 @@ VideoStore::~VideoStore() {
      pkt.size = 0;
      av_init_packet(&pkt);

+      int frame_size = audio_out_ctx->frame_size;
      /*
       * At the end of the file, we pass the remaining samples to
       * the encoder. */
-      int frame_size = audio_out_ctx->frame_size;
-      Debug(2, "av_audio_fifo_size = %s", av_audio_fifo_size(fifo));
+      while ( swr_get_delay(resample_ctx, audio_out_ctx->sample_rate) ) {
+        swr_convert_frame(resample_ctx, out_frame, NULL);
+        int ret = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + out_frame->nb_samples);
+        if ( ret < 0 ) {
+          Error("Could not reallocate FIFO to %d samples",
+              av_audio_fifo_size(fifo) + out_frame->nb_samples);
+        } else {
+          /** Store the new samples in the FIFO buffer. */
+          ret = av_audio_fifo_write(fifo, (void **)out_frame->data, out_frame->nb_samples);
+          if ( ret < out_frame->nb_samples ) {
+            Error("Could not write data to FIFO. %d written, expecting %d. Reason %s",
+                ret, out_frame->nb_samples, av_make_error_string(ret).c_str());
+          }
+          // Should probably set the frame size to what is reported FIXME
+          if ( av_audio_fifo_read(fifo, (void **)out_frame->data, frame_size) ) {
+            if ( zm_send_frame_receive_packet(audio_out_ctx, out_frame, pkt) ) {
+              pkt.stream_index = audio_out_stream->index;
+
+              av_packet_rescale_ts(&pkt,
+                  audio_out_ctx->time_base,
+                  audio_out_stream->time_base);
+              write_packet(&pkt, audio_out_stream);
+            }
+          }  // end if data returned from fifo
+        }
+
+      } // end if have buffered samples in the resampler
+
+      Debug(2, "av_audio_fifo_size = %d", av_audio_fifo_size(fifo));
      while ( av_audio_fifo_size(fifo) > 0 ) {
        /* Take one frame worth of audio samples from the FIFO buffer,
         * encode it and write it to the output file. */
@ -455,13 +483,13 @@ VideoStore::~VideoStore() {

        // SHould probably set the frame size to what is reported FIXME
        if ( av_audio_fifo_read(fifo, (void **)out_frame->data, frame_size) ) {
-          if ( zm_send_frame(audio_out_ctx, out_frame, pkt) ) {
+          if ( zm_send_frame_receive_packet(audio_out_ctx, out_frame, pkt) ) {
            pkt.stream_index = audio_out_stream->index;

-            pkt.duration = av_rescale_q(
-                pkt.duration,
+            av_packet_rescale_ts(&pkt,
                audio_out_ctx->time_base,
                audio_out_stream->time_base);
+            write_packet(&pkt, audio_out_stream);
          }
        }  // end if data returned from fifo
      }  // end while still data in the fifo
@ -472,15 +500,15 @@ VideoStore::~VideoStore() {
 #endif

      while (1) {
-        if ( ! zm_receive_packet(audio_out_ctx, pkt) )
+        if ( ! zm_receive_packet(audio_out_ctx, pkt) ) {
+          Debug(1, "No more packets");
          break;
+        }

        dumpPacket(&pkt, "raw from encoder");
-        pkt.stream_index = audio_out_stream->index;
-
        av_packet_rescale_ts(&pkt, audio_out_ctx->time_base, audio_out_stream->time_base);
        dumpPacket(audio_out_stream, &pkt, "writing flushed packet");
-        av_interleaved_write_frame(oc, &pkt);
+        write_packet(&pkt, audio_out_stream);
        zm_av_packet_unref(&pkt);
      }  // while have buffered frames
    }  // end if audio_out_codec
@ -858,108 +886,36 @@ bool VideoStore::setup_resampler() {
 int VideoStore::writeVideoFramePacket(AVPacket *ipkt) {
  av_init_packet(&opkt);

-  dumpPacket(video_in_stream, ipkt, "input packet");
+  dumpPacket(video_in_stream, ipkt, "video input packet");

-  int64_t duration;
-  if ( ipkt->duration != AV_NOPTS_VALUE ) {
-    duration = av_rescale_q(
-        ipkt->duration,
-        video_in_stream->time_base,
-        video_out_stream->time_base);
-    Debug(1, "duration from ipkt: %" PRId64 ") => (%" PRId64 ") (%d/%d) (%d/%d)",
-        ipkt->duration,
-        duration,
-        video_in_stream->time_base.num,
-        video_in_stream->time_base.den,
-        video_out_stream->time_base.num,
-        video_out_stream->time_base.den
-        );
-  } else {
-    duration = av_rescale_q(
-          ipkt->pts - video_last_pts,
-          video_in_stream->time_base,
-          video_out_stream->time_base);
-    Debug(1, "duration calc: pts(%" PRId64 ") - last_pts(%" PRId64 ") = (%" PRId64 ") => (%" PRId64 ")",
-        ipkt->pts,
-        video_last_pts,
-        ipkt->pts - video_last_pts,
-        duration
-        );
-    if ( duration <= 0 ) {
-      // Why are we setting the duration to 1?
-      Warning("Duration is 0, setting to 1");
-      duration = ipkt->duration ? ipkt->duration : av_rescale_q(1,video_in_stream->time_base, video_out_stream->time_base);
-    }
-  }
-  opkt.duration = duration;
+  opkt.flags = ipkt->flags;
+  opkt.data = ipkt->data;
+  opkt.size = ipkt->size;
+  opkt.duration = ipkt->duration;

-  // Scale the PTS of the outgoing packet to be the correct time base
  if ( ipkt->pts != AV_NOPTS_VALUE ) {
-
    if ( (!video_first_pts) && (ipkt->pts >= 0) ) {
      // This is the first packet.
-      opkt.pts = 0;
      Debug(2, "Starting video first_pts will become %" PRId64, ipkt->pts);
      video_first_pts = ipkt->pts;
-    } else {
-      opkt.pts = av_rescale_q(
-          ipkt->pts - video_first_pts,
-          video_in_stream->time_base,
-          video_out_stream->time_base
-          );
    }
-    Debug(3, "opkt.pts = %" PRId64 " from ipkt->pts(%" PRId64 ") - first_pts(%" PRId64 ")",
-        opkt.pts, ipkt->pts, video_first_pts);
-    video_last_pts = ipkt->pts;
-  } else {
-    Debug(3, "opkt.pts = undef");
-    opkt.pts = AV_NOPTS_VALUE;
+    opkt.pts = ipkt->pts - video_first_pts;
  }
  // Just because the in stream wraps, doesn't mean the out needs to.
  // Really, if we are limiting ourselves to 10min segments I can't imagine every wrapping in the out.
  // So need to handle in wrap, without causing out wrap.
+  // The cameras that Icon has seem to do EOF instead of wrapping

  if ( ipkt->dts != AV_NOPTS_VALUE ) {
    if ( !video_first_dts ) {
-     // && ( ipkt->dts >= 0 ) ) {
-      // This is the first packet.
-      opkt.dts = 0;
-      Debug(2, "Starting video first_dts will become (%" PRId64 ")", ipkt->dts);
+      Debug(2, "Starting video first_dts will become %" PRId64, ipkt->dts);
      video_first_dts = ipkt->dts;
-    } else {
-      opkt.dts = av_rescale_q(
-          ipkt->dts - video_first_dts,
-          video_in_stream->time_base,
-          video_out_stream->time_base
-          );
-      Debug(3, "opkt.dts = %" PRId64 " from ipkt->dts(%" PRId64 ") - first_pts(%" PRId64 ")",
-          opkt.dts, ipkt->dts, video_first_dts);
    }
-    if ( (opkt.pts != AV_NOPTS_VALUE) && (opkt.dts > opkt.pts) ) {
-      Debug(1,
-          "opkt.dts(%" PRId64 ") must be <= opkt.pts(%" PRId64 "). Decompression must happen "
-          "before presentation.",
-          opkt.dts, opkt.pts);
-      opkt.dts = opkt.pts;
-    }
-  } else {
-    Debug(3, "opkt.dts = undef");
-    opkt.dts = video_out_stream->cur_dts;
+    opkt.dts = ipkt->dts - video_first_dts;
  }
+  av_packet_rescale_ts(&opkt, video_in_stream->time_base, video_out_stream->time_base);

-  if ( opkt.dts < video_out_stream->cur_dts ) {
-    Debug(1, "Fixing non-monotonic dts/pts dts %" PRId64 " pts %" PRId64 " stream %" PRId64,
-        opkt.dts, opkt.pts, video_out_stream->cur_dts);
-    opkt.dts = video_out_stream->cur_dts;
-    if ( opkt.dts > opkt.pts ) {
-      opkt.pts = opkt.dts;
-    }
-  }
-
-  opkt.flags = ipkt->flags;
-  opkt.pos = -1;
-  opkt.data = ipkt->data;
-  opkt.size = ipkt->size;
+  dumpPacket(video_out_stream, &opkt, "after pts adjustment");
  write_packet(&opkt, video_out_stream);
  zm_av_packet_unref(&opkt);

@ -1020,7 +976,7 @@ int VideoStore::writeAudioFramePacket(AVPacket *ipkt) {
  dumpPacket(audio_in_stream, ipkt, "after pts adjustment");

  if ( audio_out_codec ) {
-    if ( ( ret = zm_receive_frame(audio_in_ctx, in_frame, *ipkt) ) < 0 ) {
+    if ( ( ret = zm_send_packet_receive_frame(audio_in_ctx, in_frame, *ipkt) ) <= 0 ) {
      Debug(3, "Not ready to receive frame");
      return 0;
    }
@ -1034,7 +990,7 @@ int VideoStore::writeAudioFramePacket(AVPacket *ipkt) {
    zm_dump_frame(out_frame, "Out frame after resample");

    av_init_packet(&opkt);
-    if ( !zm_send_frame(audio_out_ctx, out_frame, opkt) ) {
+    if ( zm_send_frame_receive_packet(audio_out_ctx, out_frame, opkt) <= 0 ) {
      return 0;
    }

@ -1048,9 +1004,9 @@ int VideoStore::writeAudioFramePacket(AVPacket *ipkt) {

 #if LIBAVCODEC_VERSION_CHECK(57, 64, 0, 64, 0)
    // While the encoder still has packets for us
-    while ( !avcodec_receive_packet(audio_out_ctx, &opkt) ) {
+    while ( ! ( avcodec_receive_packet(audio_out_ctx, &opkt) < 0 ) ) {
      av_packet_rescale_ts(&opkt, audio_out_ctx->time_base, audio_out_stream->time_base);
-      dumpPacket(audio_out_stream, &opkt, "raw opkt");
+      dumpPacket(audio_out_stream, &opkt, "secondary opkt");
      write_packet(&opkt, audio_out_stream);
    }
 #endif
@ -1166,12 +1122,15 @@ int VideoStore::resample_audio() {
        av_make_error_string(ret).c_str());
    return 0;
  }
-  zm_dump_frame(out_frame, "Out frame after resample");
-  out_frame->pkt_duration = in_frame->pkt_duration; // resampling doesn't alter duration
+  zm_dump_frame(out_frame, "Out frame after resample delay");
+  Debug(3,"sws_get_delay %d",
+      swr_get_delay(resample_ctx, audio_out_ctx->sample_rate));
+  //out_frame->pkt_duration = in_frame->pkt_duration; // resampling doesn't alter duration

  ret = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + out_frame->nb_samples);
  if ( ret < 0 ) {
-    Error("Could not reallocate FIFO");
+    Error("Could not reallocate FIFO to %d samples",
+        av_audio_fifo_size(fifo) + out_frame->nb_samples);
    return 0;
  }
  /** Store the new samples in the FIFO buffer. */
@ -1198,19 +1157,10 @@ int VideoStore::resample_audio() {
  }
  out_frame->nb_samples = frame_size;
  zm_dump_frame(out_frame, "Out frame after fifo read");
-  // resampling changes the duration because the timebase is 1/samples
-  // I think we should be dealing in codec timebases not stream
-#if 0
-  if ( in_frame->pts != AV_NOPTS_VALUE ) {
-    out_frame->pts = av_rescale_q(
-        in_frame->pts,
-        audio_in_ctx->time_base,
-        audio_out_ctx->time_base);
-  }
-#else
-    out_frame->pts = audio_next_pts;
-    audio_next_pts += out_frame->nb_samples;
-#endif
+
+  out_frame->pts = audio_next_pts;
+  audio_next_pts += out_frame->nb_samples;
+
  zm_dump_frame(out_frame, "Out frame after timestamp conversion");
 #else
 #if defined(HAVE_LIBAVRESAMPLE)