just more messing around trying to get audio transcoding to work

2016-09-26 17:04:04 -04:00 · 2016-09-26 17:04:04 -04:00 · 1bd652ce4b
parent 06da04c5d7
commit 1bd652ce4b
3 changed files with 94 additions and 54 deletions
--- a/src/zm_ffmpeg_camera.cpp
+++ b/src/zm_ffmpeg_camera.cpp
@ -85,6 +85,8 @@ FfmpegCamera::FfmpegCamera( int p_id, const std::string &p_path, const std::stri

 FfmpegCamera::~FfmpegCamera()
 {
+
+
  CloseFfmpeg();

  if ( capture )
@ -575,15 +577,20 @@ Debug(5, "After av_read_frame (%d)", ret );
      Error( "Unable to read packet from stream %d: error %d \"%s\".", packet.stream_index, ret, errbuf );
      return( -1 );
    }
-    Debug( 3, "Got packet from stream %d dts (%d) pts(%d) key?(%d)", packet.stream_index, packet.dts, packet.pts, packet.flags & AV_PKT_FLAG_KEY );
-    //av_packet_ref( &packet, &packet );
+
+    int key_frame = packet.flags & AV_PKT_FLAG_KEY;
+
+    Debug( 3, "Got packet from stream %d packet pts (%d) dts(%d), key?(%d)", 
+        packet.stream_index, packet.pts, packet.dts, 
+        key_frame
+        );

    //Video recording
    if ( recording ) {
      // The directory we are recording to is no longer tied to the current event. 
      // Need to re-init the videostore with the correct directory and start recording again
      // for efficiency's sake, we should test for keyframe before we test for directory change...
-      if ( videoStore && (packet.flags & AV_PKT_FLAG_KEY) && (strcmp(oldDirectory, event_file) != 0 ) ) {
+      if ( videoStore && key_frame && (strcmp(oldDirectory, event_file) != 0 ) ) {
        // don't open new videostore until we're on a key frame..would this require an offset adjustment for the event as a result?...
        // if we store our key frame location with the event will that be enough?
        Info("Re-starting video storage module");
@ -638,7 +645,7 @@ Debug(5, "After av_read_frame (%d)", ret );
        while ( ( queued_packet = packetqueue.popPacket() ) ) {
          packet_count += 1;
          //Write the packet to our video store
-      Debug(2, "Writing queued packet stream: %d  KEY %d, remaining (%d)", queued_packet->stream_index, queued_packet->flags & AV_PKT_FLAG_KEY, packetqueue.size() );
+          Debug(2, "Writing queued packet stream: %d  KEY %d, remaining (%d)", queued_packet->stream_index, queued_packet->flags & AV_PKT_FLAG_KEY, packetqueue.size() );
          if ( queued_packet->stream_index == mVideoStreamId ) {
            ret = videoStore->writeVideoFramePacket( queued_packet );
          } else if ( queued_packet->stream_index == mAudioStreamId ) {
@ -666,17 +673,21 @@ Debug(5, "After av_read_frame (%d)", ret );

      //Buffer video packets, since we are not recording. All audio packets are keyframes, so only if it's a video keyframe
      if ( packet.stream_index == mVideoStreamId) {
-        if ( packet.flags & AV_PKT_FLAG_KEY ) {
+        if ( key_frame ) {
          Debug(3, "Clearing queue");
          packetqueue.clearQueue();
        }
        if ( packet.pts && video_last_pts > packet.pts ) {
-          Debug(3, "Clearing queue due to out of order pts");
+          Warning( "Clearing queue due to out of order pts");
          packetqueue.clearQueue();
        }
      } 

-      if ( packet.stream_index != mAudioStreamId || record_audio ) {
+      if ( 
+          ( packet.stream_index != mAudioStreamId || record_audio ) 
+          &&
+          ( key_frame || packetqueue.size() )
+         ) {
        packetqueue.queuePacket( &packet );
      }
    } // end if recording or not
--- a/src/zm_videostore.cpp
+++ b/src/zm_videostore.cpp
@ -52,7 +52,6 @@ VideoStore::VideoStore(const char *filename_in, const char *format_in,

  Info("Opening video storage stream %s format: %s\n", filename, format);

-  int ret;
  static char error_buffer[255];
  //Init everything we need, shouldn't have to do this, ffmpeg_camera or something else will call it.
  //av_register_all();
@ -116,6 +115,10 @@ VideoStore::VideoStore(const char *filename_in, const char *format_in,
  }
 #endif

+  // Just copy them from the input, no reason to choose different
+  video_output_context->time_base = video_input_context->time_base;
+  video_output_stream->time_base = video_input_stream->time_base;
+
  Debug(3, "Time bases: VIDEO input stream (%d/%d) input codec: (%d/%d) output stream: (%d/%d) output codec (%d/%d)", 
        video_input_stream->time_base.num,
        video_input_stream->time_base.den,
@ -260,17 +263,8 @@ Debug(2, "Have audio_output_context");
          audio_output_context->sample_fmt = AV_SAMPLE_FMT_FLTP;
        }

-        Debug(3, "Audio Time bases input stream (%d/%d) input codec: (%d/%d) output_stream (%d/%d) output codec (%d/%d)", 
-            audio_input_stream->time_base.num,
-            audio_input_stream->time_base.den,
-            audio_input_context->time_base.num,
-            audio_input_context->time_base.den,
-            audio_output_stream->time_base.num,
-            audio_output_stream->time_base.den,
-            audio_output_context->time_base.num,
-            audio_output_context->time_base.den
-            );
-        audio_output_stream->time_base = (AVRational){ 1, audio_output_context->sample_rate };
+        //audio_output_stream->time_base = audio_input_stream->time_base;
+        audio_output_context->time_base = (AVRational){ 1, audio_output_context->sample_rate };

        Debug(3, "Audio Time bases input stream (%d/%d) input codec: (%d/%d) output_stream (%d/%d) output codec (%d/%d)", 
            audio_input_stream->time_base.num,
@ -282,9 +276,6 @@ Debug(2, "Have audio_output_context");
            audio_output_context->time_base.num,
            audio_output_context->time_base.den
            );
-        /** Set the sample rate for the container. */
-        //audio_output_stream->time_base.den = audio_input_context->sample_rate;
-        //audio_output_stream->time_base.num = 1;

        ret = avcodec_open2(audio_output_context, audio_output_codec, &opts );
        if ( ret < 0 ) {
@ -409,19 +400,11 @@ Debug(2, "Have audio_output_context");
        Debug(3, "Audio is mono");
      }
    } // end if is AAC
-      if (oc->oformat->flags & AVFMT_GLOBALHEADER) {
-        audio_output_context->flags |= CODEC_FLAG_GLOBAL_HEADER;
-      }
- Debug(3, "Audio Time bases input stream time base(%d/%d) input codec tb: (%d/%d) video_output_stream->time-base(%d/%d) output codec tb (%d/%d)", 
-        audio_input_stream->time_base.num,
-        audio_input_stream->time_base.den,
-        audio_input_context->time_base.num,
-        audio_input_context->time_base.den,
-        audio_output_stream->time_base.num,
-        audio_output_stream->time_base.den,
-        audio_output_context->time_base.num,
-        audio_output_context->time_base.den
-        );
+
+    if (oc->oformat->flags & AVFMT_GLOBALHEADER) {
+      audio_output_context->flags |= CODEC_FLAG_GLOBAL_HEADER;
+    }
+
  } else {
    Debug(3, "No Audio output stream");
    audio_output_stream = NULL;
@ -469,6 +452,40 @@ Debug(2, "Have audio_output_context");


 VideoStore::~VideoStore(){
+  if ( audio_output_codec ) {
+Debug(1, "Have audio encoder, need to flush it's output" );
+    // Do we need to flush the outputs?  I have no idea.
+    AVPacket pkt;
+    int got_packet;
+    av_init_packet(&pkt);
+    pkt.data = NULL;
+    pkt.size = 0;
+    int64_t size;
+
+    while(1) {
+      ret = avcodec_encode_audio2( audio_output_context, &pkt, NULL, &got_packet );
+      if (ret < 0) {
+        Error("ERror encoding audio while flushing");
+        break;
+      }
+Debug(1, "Have audio encoder, need to flush it's output" );
+      size += pkt.size;
+      if (!got_packet) {
+        break;
+      }
+Debug(2, "writing flushed packet pts(%d) dts(%d) duration(%d)", pkt.pts, pkt.dts, pkt.duration );
+      if (pkt.pts != AV_NOPTS_VALUE)
+        pkt.pts = av_rescale_q(pkt.pts, audio_output_context->time_base, audio_output_stream->time_base);
+      if (pkt.dts != AV_NOPTS_VALUE)
+        pkt.dts = av_rescale_q(pkt.dts, audio_output_context->time_base, audio_output_stream->time_base);
+      if (pkt.duration > 0)
+        pkt.duration = av_rescale_q(pkt.duration, audio_output_context->time_base, audio_output_stream->time_base);
+Debug(2, "writing flushed packet pts(%d) dts(%d) duration(%d)", pkt.pts, pkt.dts, pkt.duration );
+      pkt.stream_index = audio_output_stream->index;
+      av_interleaved_write_frame( oc, &pkt );
+      zm_av_unref_packet( &pkt );
+    } // while 1
+  }
  /* Write the trailer before close */
  if ( int rc = av_write_trailer(oc) ) {
    Error("Error writing trailer %s",  av_err2str( rc ) );
@ -626,7 +643,6 @@ Debug(4, "Not video and RAWPICTURE");
    dumpPacket(&opkt);

  } else {
-    int ret;

    prevDts = opkt.dts; // Unsure if av_interleaved_write_frame() clobbers opkt.dts when out of order, so storing in advance
    ret = av_interleaved_write_frame(oc, &opkt);
@ -654,15 +670,12 @@ int VideoStore::writeAudioFramePacket( AVPacket *ipkt ) {
    return -1;*/
  //zm_dump_stream_format( oc, ipkt->stream_index, 0, 1 );

-  int ret;
-
-
  av_init_packet(&opkt);
  Debug(5, "after init packet" );

 #if 1
 //Scale the PTS of the outgoing packet to be the correct time base
-  if (ipkt->pts != AV_NOPTS_VALUE) {
+  if ( ipkt->pts != AV_NOPTS_VALUE ) {
    if ( (!audio_start_pts) || ( audio_start_pts > ipkt->pts ) ) {
      Debug(1, "Resetting audeo_start_pts from (%d) to (%d)",  audio_start_pts, ipkt->pts );
      //never gets set, so the first packet can set it.
@ -677,7 +690,7 @@ int VideoStore::writeAudioFramePacket( AVPacket *ipkt ) {
  //Scale the DTS of the outgoing packet to be the correct time base
  if(ipkt->dts == AV_NOPTS_VALUE) {
    if ( (!audio_start_dts) || (audio_start_dts > audio_input_stream->cur_dts ) ) {
-      Debug(1, "Resetting audeo_start_pts from (%d) to (%d)",  audio_start_dts, audio_input_stream->cur_dts );
+      Debug(1, "Resetting audio_start_pts from (%d) to cur_dts (%d)",  audio_start_dts, audio_input_stream->cur_dts );
      audio_start_dts = audio_input_stream->cur_dts;
    }
    opkt.dts = av_rescale_q(audio_input_stream->cur_dts - audio_start_dts, AV_TIME_BASE_Q, audio_output_stream->time_base);
@ -685,7 +698,7 @@ int VideoStore::writeAudioFramePacket( AVPacket *ipkt ) {
        opkt.dts, audio_input_stream->cur_dts, audio_start_dts
        );
  } else {
-    if ( (!audio_start_dts) || ( audio_start_dts > ipkt->dts ) ) {
+    if ( ( ! audio_start_dts ) || ( audio_start_dts > ipkt->dts ) ) {
      Debug(1, "Resetting audeo_start_dts from (%d) to (%d)",  audio_start_dts, ipkt->dts );
      audio_start_dts = ipkt->dts;
    }
@ -750,8 +763,6 @@ av_codec_is_encoder( audio_output_context->codec)
  av_frame_unref( input_frame );
 #else

-  // convert the packet to the codec timebase from the stream timebase
-  av_packet_rescale_ts( ipkt, audio_input_stream->time_base, audio_input_context->time_base );

    /**
     * Decode the audio frame stored in the packet.
@ -802,11 +813,6 @@ av_codec_is_encoder( audio_output_context->codec)
      return 0;
    }

-    /** Create a new frame to store the audio samples. */
-    if (!(output_frame = zm_av_frame_alloc())) {
-      Error("Could not allocate output frame");
-      return 0;
-    }
    /**
     * Set the frame's parameters, especially its size and format.
     * av_frame_get_buffer needs this to allocate memory for the
@ -834,15 +840,19 @@ av_codec_is_encoder( audio_output_context->codec)
      return 0;
    }

-    /** Set a timestamp based on the sample rate for the container. */
-    if (output_frame) {
-      output_frame->pts = av_frame_get_best_effort_timestamp(output_frame);
-    }
    Debug(4, "About to read");
    if (av_audio_fifo_read(fifo, (void **)output_frame->data, frame_size) < frame_size) {
      Error( "Could not read data from FIFO\n");
      return 0;
    }
+
+    /** Set a timestamp based on the sample rate for the container. */
+    output_frame->pts = av_rescale_q( opkt.pts, audio_output_context->time_base, audio_output_stream->time_base );
+
+  // convert the packet to the codec timebase from the stream timebase
+Debug(3, "output_frame->pts(%d) best effort(%d)", output_frame->pts, 
+av_frame_get_best_effort_timestamp(output_frame)
+ );
    /**
     * Encode the audio frame and store it in the temporary packet.
     * The output audio stream encoder is used to do this.
@ -859,9 +869,24 @@ av_codec_is_encoder( audio_output_context->codec)
      zm_av_unref_packet(&opkt);
      return 0;
    }
+  
+
+Debug(2, "opkt dts (%d) pts(%d) duration:(%d)", opkt.dts, opkt.pts, opkt.duration );

    // Convert tb from code back to stream
-    av_packet_rescale_ts(&opkt, audio_output_context->time_base, audio_output_stream->time_base);
+    //av_packet_rescale_ts(&opkt, audio_output_context->time_base, audio_output_stream->time_base);
+if (opkt.pts != AV_NOPTS_VALUE)  {
+             opkt.pts      = av_rescale_q( opkt.pts,      audio_output_context->time_base, audio_output_stream->time_base);
+}
+         if ( opkt.dts != AV_NOPTS_VALUE)
+             opkt.dts      = av_rescale_q( opkt.dts,      audio_output_context->time_base, audio_output_stream->time_base);
+         if ( opkt.duration > 0)
+             opkt.duration = av_rescale_q( opkt.duration, audio_output_context->time_base, audio_output_stream->time_base);
+Debug(2, "opkt dts (%d) pts(%d) duration:(%d) pos(%d) ", opkt.dts, opkt.pts, opkt.duration, opkt.pos );
+
+
+//opkt.dts = AV_NOPTS_VALUE;
+ 

 #endif
  } else {
--- a/src/zm_videostore.h
+++ b/src/zm_videostore.h
@ -32,6 +32,7 @@ private:

  AVCodecContext *video_input_context;
  AVCodecContext *audio_input_context;
+  int ret;

  // The following are used when encoding the audio stream to AAC
  AVCodec *audio_output_codec;
@ -53,6 +54,9 @@ uint8_t *converted_input_samples = NULL;
  int64_t audio_start_pts;
  int64_t audio_start_dts;

+  int64_t start_pts;
+  int64_t start_dts;
+
 	int64_t prevDts;
  int64_t filter_in_rescale_delta_last;