From 5ddd20ed1a8554d476d4c7fbcd1fbe394f3959d3 Mon Sep 17 00:00:00 2001
From: Isaac Connor <isaac@zoneminder.com>
Date: Fri, 13 Sep 2019 14:34:53 -0400
Subject: [PATCH] refactor code.  Handle resample buffering during encoding
 instead of when closing file

---
 src/zm_ffmpeg.cpp     |  87 ++++++++++++++++++++++
 src/zm_ffmpeg.h       |  26 +++++++
 src/zm_videostore.cpp | 168 +++++++-----------------------------------
 src/zm_videostore.h   |   4 +-
 4 files changed, 142 insertions(+), 143 deletions(-)

diff --git a/src/zm_ffmpeg.cpp b/src/zm_ffmpeg.cpp
index 5616dcd91..55fc52f73 100644
--- a/src/zm_ffmpeg.cpp
+++ b/src/zm_ffmpeg.cpp
@@ -640,3 +640,90 @@ void zm_packet_copy_rescale_ts(const AVPacket *ipkt, AVPacket *opkt, const AVRat
   opkt->duration = ipkt->duration;
   av_packet_rescale_ts(opkt, src_tb, dst_tb);
 }
+
+#if defined(HAVE_LIBSWRESAMPLE) || defined(HAVE_LIBAVRESAMPLE)
+int zm_resample_audio(
+#if defined(HAVE_LIBSWRESAMPLE)
+    SwrContext *resample_ctx,
+#else
+#if defined(HAVE_LIBSWRESAMPLE)
+    AVAudioResampleContext *resample_ctx,
+#endif
+#endif
+    AVFrame *in_frame,
+    AVFrame *out_frame
+    ) {
+#if defined(HAVE_LIBSWRESAMPLE)
+  // Resample the in_frame into the audioSampleBuffer until we process the whole
+  // decoded data. Note: pts does not survive resampling or converting
+  Debug(2, "Converting %d to %d samples using swresample",
+      in_frame->nb_samples, out_frame->nb_samples);
+  int ret = swr_convert_frame(resample_ctx, out_frame, in_frame);
+  if ( ret < 0 ) {
+    Error("Could not resample frame (error '%s')",
+        av_make_error_string(ret).c_str());
+    return 0;
+  }
+  Debug(3,"sws_get_delay %d",
+      swr_get_delay(resample_ctx, out_frame->sample_rate));
+#else
+#if defined(HAVE_LIBAVRESAMPLE)
+  int ret = avresample_convert(resample_ctx, NULL, 0, 0, in_frame->data,
+                            0, in_frame->nb_samples);
+  if ( ret < 0 ) {
+    Error("Could not resample frame (error '%s')",
+        av_make_error_string(ret).c_str());
+    return 0;
+  }
+  int samples_available = avresample_available(resample_ctx);
+  if ( samples_available < out_frame->nb_samples ) {
+    Debug(1, "Not enough samples yet (%d)", samples_available);
+    return 0;
+  }
+
+  // Read a frame audio data from the resample fifo
+  if ( avresample_read(resample_ctx, out_frame->data, out_frame->nb_samples) !=
+      out_frame->nb_samples) {
+    Warning("Error reading resampled audio.");
+    return 0;
+  }
+#endif
+#endif
+  zm_dump_frame(out_frame, "Out frame after resample delay");
+  return 1;
+}
+#endif
+
+int zm_add_samples_to_fifo(AVAudioFifo *fifo, AVFrame *frame) {
+  int ret = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + frame->nb_samples);
+  if ( ret < 0 ) {
+    Error("Could not reallocate FIFO to %d samples",
+        av_audio_fifo_size(fifo) + frame->nb_samples);
+    return 0;
+  }
+  /** Store the new samples in the FIFO buffer. */
+  ret = av_audio_fifo_write(fifo, (void **)frame->data, frame->nb_samples);
+  if ( ret < frame->nb_samples ) {
+    Error("Could not write data to FIFO. %d written, expecting %d. Reason %s",
+        ret, frame->nb_samples, av_make_error_string(ret).c_str());
+    return 0;
+  }
+  return 1;
+}
+
+int zm_get_samples_from_fifo(AVAudioFifo *fifo, AVFrame *frame) {
+  // AAC requires 1024 samples per encode.  Our input tends to be something else, so need to buffer them.
+  if ( frame->nb_samples > av_audio_fifo_size(fifo) ) {
+    Debug(1, "Not enough samples in fifo for AAC codec frame_size %d > fifo size %d",
+         frame->nb_samples, av_audio_fifo_size(fifo));
+    return 0;
+  }
+
+  if ( av_audio_fifo_read(fifo, (void **)frame->data, frame->nb_samples) < frame->nb_samples ) {
+    Error("Could not read data from FIFO");
+    return 0;
+  }
+//out_frame->nb_samples = frame_size;
+  zm_dump_frame(frame, "Out frame after fifo read");
+  return 1;
+}
diff --git a/src/zm_ffmpeg.h b/src/zm_ffmpeg.h
index 0aa6c6d68..ff2da509c 100644
--- a/src/zm_ffmpeg.h
+++ b/src/zm_ffmpeg.h
@@ -24,6 +24,14 @@
 
 extern "C" {
 
+#ifdef HAVE_LIBSWRESAMPLE
+  #include "libswresample/swresample.h"
+#else
+  #ifdef HAVE_LIBAVRESAMPLE
+    #include "libavresample/avresample.h"
+  #endif
+#endif
+
 // AVUTIL
 #if HAVE_LIBAVUTIL_AVUTIL_H
 #include "libavutil/avassert.h"
@@ -31,6 +39,7 @@ extern "C" {
 #include <libavutil/base64.h>
 #include <libavutil/mathematics.h>
 #include <libavutil/avstring.h>
+#include "libavutil/audio_fifo.h"
 
 /* LIBAVUTIL_VERSION_CHECK checks for the right version of libav and FFmpeg
  * The original source is vlc (in modules/codec/avcodec/avcommon_compat.h)
@@ -360,4 +369,21 @@ int zm_send_frame_receive_packet(AVCodecContext *context, AVFrame *frame, AVPack
 void dumpPacket(AVStream *, AVPacket *,const char *text="");
 void dumpPacket(AVPacket *,const char *text="");
 void zm_packet_copy_rescale_ts(const AVPacket *ipkt, AVPacket *opkt, const AVRational src_tb, const AVRational dst_tb);
+
+#if defined(HAVE_LIBSWRESAMPLE) || defined(HAVE_LIBAVRESAMPLE)
+int zm_resample_audio(
+#if defined(HAVE_LIBSWRESAMPLE)
+    SwrContext *resample_ctx,
+#else
+#if defined(HAVE_LIBSWRESAMPLE)
+    AVAudioResampleContext *resample_ctx,
+#endif
+#endif
+    AVFrame *in_frame,
+    AVFrame *out_frame
+    );
+#endif
+int zm_add_samples_to_fifo(AVAudioFifo *fifo, AVFrame *frame);
+int zm_get_samples_from_fifo(AVAudioFifo *fifo, AVFrame *frame);
+
 #endif // ZM_FFMPEG_H
diff --git a/src/zm_videostore.cpp b/src/zm_videostore.cpp
index 9653bf9cf..ec1459fae 100644
--- a/src/zm_videostore.cpp
+++ b/src/zm_videostore.cpp
@@ -933,41 +933,11 @@ int VideoStore::writeAudioFramePacket(AVPacket *ipkt) {
   dumpPacket(audio_in_stream, ipkt, "input packet");
 
   if ( !audio_first_pts ) {
-#if 0
-    // Since audio starts after the start of the video, need to set this here.
-    audio_first_pts = av_rescale_q(
-        video_first_pts,
-        video_in_stream->time_base,
-        audio_in_stream->time_base
-        );
-    Debug(2, "Starting audio first_pts will become %" PRId64, audio_first_pts);
-    audio_next_pts = ipkt->pts - audio_first_pts;
-    if ( audio_next_pts < 0 ) {
-      audio_first_pts -= audio_next_pts;
-      audio_next_pts = 0;
-    }
-#else
     audio_first_pts = ipkt->pts;
     audio_next_pts = audio_out_ctx->frame_size;
-#endif
   }
   if ( !audio_first_dts ) {
-#if 0
-    // Since audio starts after the start of the video, need to set this here.
-    audio_first_dts = av_rescale_q(
-        video_first_dts,
-        video_in_stream->time_base,
-        audio_in_stream->time_base
-        );
-    audio_next_dts = ipkt->dts - audio_first_dts;
-    if ( audio_next_dts < 0 ) {
-      audio_first_dts -= audio_next_dts;
-      audio_next_dts = 0;
-    }
-    Debug(2, "Starting audio first_dts will become %" PRId64, audio_first_dts);
-#else
     audio_first_dts = ipkt->dts;
-#endif
   }
 
   // Need to adjust pts before feeding to decoder.... should really copy the pkt instead of modifying it
@@ -976,41 +946,45 @@ int VideoStore::writeAudioFramePacket(AVPacket *ipkt) {
   dumpPacket(audio_in_stream, ipkt, "after pts adjustment");
 
   if ( audio_out_codec ) {
+    // I wonder if we can get multiple frames per packet? Probably
     if ( ( ret = zm_send_packet_receive_frame(audio_in_ctx, in_frame, *ipkt) ) <= 0 ) {
       Debug(3, "Not ready to receive frame");
       return 0;
     }
-
     zm_dump_frame(in_frame, "In frame from decode");
-    if ( !resample_audio() ) {
-      //av_frame_unref(in_frame);
-      return 0;
-    }
 
-    zm_dump_frame(out_frame, "Out frame after resample");
+    AVFrame *input_frame = in_frame;
 
-    av_init_packet(&opkt);
-    if ( zm_send_frame_receive_packet(audio_out_ctx, out_frame, opkt) <= 0 ) {
-      return 0;
-    }
+    while ( zm_resample_audio(resample_ctx, input_frame, out_frame) ) {
+      //out_frame->pkt_duration = in_frame->pkt_duration; // resampling doesn't alter duration
+      if ( zm_add_samples_to_fifo(fifo, out_frame) <= 0 )
+        break;
 
-    // Scale the PTS of the outgoing packet to be the correct time base
-    av_packet_rescale_ts(&opkt,
-            audio_out_ctx->time_base,
-            audio_out_stream->time_base);
+      if ( zm_get_samples_from_fifo(fifo, out_frame) <= 0 )
+        break;
 
-    write_packet(&opkt, audio_out_stream);
-    zm_av_packet_unref(&opkt);
+      out_frame->pts = audio_next_pts;
+      audio_next_pts += out_frame->nb_samples;
+
+      zm_dump_frame(out_frame, "Out frame after resample");
+
+      av_init_packet(&opkt);
+      if ( zm_send_frame_receive_packet(audio_out_ctx, out_frame, opkt) <= 0 )
+        break;
+
+      // Scale the PTS of the outgoing packet to be the correct time base
+      av_packet_rescale_ts(&opkt,
+          audio_out_ctx->time_base,
+          audio_out_stream->time_base);
 
-#if LIBAVCODEC_VERSION_CHECK(57, 64, 0, 64, 0)
-    // While the encoder still has packets for us
-    while ( ! ( avcodec_receive_packet(audio_out_ctx, &opkt) < 0 ) ) {
-      av_packet_rescale_ts(&opkt, audio_out_ctx->time_base, audio_out_stream->time_base);
-      dumpPacket(audio_out_stream, &opkt, "secondary opkt");
       write_packet(&opkt, audio_out_stream);
-    }
-#endif
-    zm_av_packet_unref(&opkt);
+      zm_av_packet_unref(&opkt);
+
+      if ( swr_get_delay(resample_ctx, out_frame->sample_rate) < out_frame->nb_samples)
+        break;
+      // This will send a null frame, emptying out the resample buffer
+      input_frame = NULL;
+    } // end while there is data in the resampler
 
   } else {
     Debug(2,"copying");
@@ -1108,89 +1082,3 @@ int VideoStore::write_packet(AVPacket *pkt, AVStream *stream) {
   }
   return ret;
 }  // end int VideoStore::write_packet(AVPacket *pkt, AVStream *stream)
-
-int VideoStore::resample_audio() {
-  // Resample the in_frame into the audioSampleBuffer until we process the whole
-  // decoded data. Note: pts does not survive resampling or converting
-#if defined(HAVE_LIBSWRESAMPLE) || defined(HAVE_LIBAVRESAMPLE)
-#if defined(HAVE_LIBSWRESAMPLE)
-  Debug(2, "Converting %d to %d samples using swresample",
-      in_frame->nb_samples, out_frame->nb_samples);
-  int ret = swr_convert_frame(resample_ctx, out_frame, in_frame);
-  if ( ret < 0 ) {
-    Error("Could not resample frame (error '%s')",
-        av_make_error_string(ret).c_str());
-    return 0;
-  }
-  zm_dump_frame(out_frame, "Out frame after resample delay");
-  Debug(3,"sws_get_delay %d",
-      swr_get_delay(resample_ctx, audio_out_ctx->sample_rate));
-  //out_frame->pkt_duration = in_frame->pkt_duration; // resampling doesn't alter duration
-
-  ret = av_audio_fifo_realloc(fifo, av_audio_fifo_size(fifo) + out_frame->nb_samples);
-  if ( ret < 0 ) {
-    Error("Could not reallocate FIFO to %d samples",
-        av_audio_fifo_size(fifo) + out_frame->nb_samples);
-    return 0;
-  }
-  /** Store the new samples in the FIFO buffer. */
-  ret = av_audio_fifo_write(fifo, (void **)out_frame->data, out_frame->nb_samples);
-  if ( ret < out_frame->nb_samples ) {
-    Error("Could not write data to FIFO. %d written, expecting %d. Reason %s",
-        ret, out_frame->nb_samples, av_make_error_string(ret).c_str());
-    return 0;
-  }
-
-  // Reset frame_size to output_frame_size
-  int frame_size = audio_out_ctx->frame_size;
-
-  // AAC requires 1024 samples per encode.  Our input tends to be something else, so need to buffer them.
-  if ( frame_size > av_audio_fifo_size(fifo) ) {
-    Debug(1, "Not enough samples in fifo for AAC codec frame_size %d > fifo size %d",
-         frame_size, av_audio_fifo_size(fifo));
-    return 0;
-  }
-
-  if ( av_audio_fifo_read(fifo, (void **)out_frame->data, frame_size) < frame_size ) {
-    Error("Could not read data from FIFO");
-    return 0;
-  }
-  out_frame->nb_samples = frame_size;
-  zm_dump_frame(out_frame, "Out frame after fifo read");
-
-  out_frame->pts = audio_next_pts;
-  audio_next_pts += out_frame->nb_samples;
-
-  zm_dump_frame(out_frame, "Out frame after timestamp conversion");
-#else
-#if defined(HAVE_LIBAVRESAMPLE)
-  ret = avresample_convert(resample_ctx, NULL, 0, 0, in_frame->data,
-                            0, in_frame->nb_samples);
-  if ( ret < 0 ) {
-    Error("Could not resample frame (error '%s')",
-        av_make_error_string(ret).c_str());
-    return 0;
-  }
-
-  int frame_size = audio_out_ctx->frame_size;
-
-  int samples_available = avresample_available(resample_ctx);
-  if ( samples_available < frame_size ) {
-    Debug(1, "Not enough samples yet (%d)", samples_available);
-    return 0;
-  }
-
-  // Read a frame audio data from the resample fifo
-  if ( avresample_read(resample_ctx, out_frame->data, frame_size) !=
-      frame_size) {
-    Warning("Error reading resampled audio.");
-    return 0;
-  }
-#endif
-#endif
-#else
-    Error("Have audio codec but no resampler?!");
-    return 0;
-#endif
-  return 1;
-}  // end int VideoStore::resample_audio
diff --git a/src/zm_videostore.h b/src/zm_videostore.h
index 01dbc4d78..cd704403d 100644
--- a/src/zm_videostore.h
+++ b/src/zm_videostore.h
@@ -5,12 +5,12 @@
 extern "C"  {
 #ifdef HAVE_LIBSWRESAMPLE
   #include "libswresample/swresample.h"
-  #include "libavutil/audio_fifo.h"
 #else
   #ifdef HAVE_LIBAVRESAMPLE
     #include "libavresample/avresample.h"
   #endif
 #endif
+#include "libavutil/audio_fifo.h"
 }
 
 #if HAVE_LIBAVCODEC
@@ -76,8 +76,6 @@ private:
   int64_t audio_next_dts;
 
   bool setup_resampler();
-  int resample_audio();
-
   int write_packet(AVPacket *pkt, AVStream *stream);
 
 public: