167 lines
6.3 KiB
Python
167 lines
6.3 KiB
Python
"""Provides the worker thread needed for processing streams."""
|
|
from fractions import Fraction
|
|
import io
|
|
import logging
|
|
|
|
import av
|
|
|
|
from .const import AUDIO_SAMPLE_RATE
|
|
from .core import Segment, StreamBuffer
|
|
|
|
_LOGGER = logging.getLogger(__name__)
|
|
|
|
|
|
def generate_audio_frame():
|
|
"""Generate a blank audio frame."""
|
|
|
|
audio_frame = av.AudioFrame(format="dbl", layout="mono", samples=1024)
|
|
# audio_bytes = b''.join(b'\x00\x00\x00\x00\x00\x00\x00\x00'
|
|
# for i in range(0, 1024))
|
|
audio_bytes = b"\x00\x00\x00\x00\x00\x00\x00\x00" * 1024
|
|
audio_frame.planes[0].update(audio_bytes)
|
|
audio_frame.sample_rate = AUDIO_SAMPLE_RATE
|
|
audio_frame.time_base = Fraction(1, AUDIO_SAMPLE_RATE)
|
|
return audio_frame
|
|
|
|
|
|
def create_stream_buffer(stream_output, video_stream, audio_frame):
|
|
"""Create a new StreamBuffer."""
|
|
|
|
a_packet = None
|
|
segment = io.BytesIO()
|
|
output = av.open(segment, mode="w", format=stream_output.format)
|
|
vstream = output.add_stream(template=video_stream)
|
|
# Check if audio is requested
|
|
astream = None
|
|
if stream_output.audio_codec:
|
|
astream = output.add_stream(stream_output.audio_codec, AUDIO_SAMPLE_RATE)
|
|
# Need to do it multiple times for some reason
|
|
while not a_packet:
|
|
a_packets = astream.encode(audio_frame)
|
|
if a_packets:
|
|
a_packet = a_packets[0]
|
|
return (a_packet, StreamBuffer(segment, output, vstream, astream))
|
|
|
|
|
|
def stream_worker(hass, stream, quit_event):
|
|
"""Handle consuming streams."""
|
|
|
|
container = av.open(stream.source, options=stream.options)
|
|
try:
|
|
video_stream = container.streams.video[0]
|
|
except (KeyError, IndexError):
|
|
_LOGGER.error("Stream has no video")
|
|
return
|
|
|
|
audio_frame = generate_audio_frame()
|
|
|
|
first_packet = True
|
|
# Holds the buffers for each stream provider
|
|
outputs = {}
|
|
# Keep track of the number of segments we've processed
|
|
sequence = 1
|
|
# Holds the generated silence that needs to be muxed into the output
|
|
audio_packets = {}
|
|
# The presentation timestamp of the first video packet we receive
|
|
first_pts = 0
|
|
# The decoder timestamp of the latest packet we processed
|
|
last_dts = None
|
|
|
|
while not quit_event.is_set():
|
|
try:
|
|
packet = next(container.demux(video_stream))
|
|
if packet.dts is None:
|
|
if first_packet:
|
|
continue
|
|
# If we get a "flushing" packet, the stream is done
|
|
raise StopIteration("No dts in packet")
|
|
except (av.AVError, StopIteration) as ex:
|
|
# End of stream, clear listeners and stop thread
|
|
for fmt, _ in outputs.items():
|
|
hass.loop.call_soon_threadsafe(stream.outputs[fmt].put, None)
|
|
_LOGGER.error("Error demuxing stream: %s", str(ex))
|
|
break
|
|
|
|
# Skip non monotonically increasing dts in feed
|
|
if not first_packet and last_dts >= packet.dts:
|
|
continue
|
|
last_dts = packet.dts
|
|
|
|
# Reset timestamps from a 0 time base for this stream
|
|
packet.dts -= first_pts
|
|
packet.pts -= first_pts
|
|
|
|
# Reset segment on every keyframe
|
|
if packet.is_keyframe:
|
|
# Calculate the segment duration by multiplying the presentation
|
|
# timestamp by the time base, which gets us total seconds.
|
|
# By then dividing by the seqence, we can calculate how long
|
|
# each segment is, assuming the stream starts from 0.
|
|
segment_duration = (packet.pts * packet.time_base) / sequence
|
|
# Save segment to outputs
|
|
for fmt, buffer in outputs.items():
|
|
buffer.output.close()
|
|
del audio_packets[buffer.astream]
|
|
if stream.outputs.get(fmt):
|
|
hass.loop.call_soon_threadsafe(
|
|
stream.outputs[fmt].put,
|
|
Segment(sequence, buffer.segment, segment_duration),
|
|
)
|
|
|
|
# Clear outputs and increment sequence
|
|
outputs = {}
|
|
if not first_packet:
|
|
sequence += 1
|
|
|
|
# Initialize outputs
|
|
for stream_output in stream.outputs.values():
|
|
if video_stream.name != stream_output.video_codec:
|
|
continue
|
|
|
|
a_packet, buffer = create_stream_buffer(
|
|
stream_output, video_stream, audio_frame
|
|
)
|
|
audio_packets[buffer.astream] = a_packet
|
|
outputs[stream_output.name] = buffer
|
|
|
|
# First video packet tends to have a weird dts/pts
|
|
if first_packet:
|
|
# If we are attaching to a live stream that does not reset
|
|
# timestamps for us, we need to do it ourselves by recording
|
|
# the first presentation timestamp and subtracting it from
|
|
# subsequent packets we receive.
|
|
if (packet.pts * packet.time_base) > 1:
|
|
first_pts = packet.pts
|
|
packet.dts = 0
|
|
packet.pts = 0
|
|
first_packet = False
|
|
|
|
# Store packets on each output
|
|
for buffer in outputs.values():
|
|
# Check if the format requires audio
|
|
if audio_packets.get(buffer.astream):
|
|
a_packet = audio_packets[buffer.astream]
|
|
a_time_base = a_packet.time_base
|
|
|
|
# Determine video start timestamp and duration
|
|
video_start = packet.pts * packet.time_base
|
|
video_duration = packet.duration * packet.time_base
|
|
|
|
if packet.is_keyframe:
|
|
# Set first audio packet in sequence to equal video pts
|
|
a_packet.pts = int(video_start / a_time_base)
|
|
a_packet.dts = int(video_start / a_time_base)
|
|
|
|
# Determine target end timestamp for audio
|
|
target_pts = int((video_start + video_duration) / a_time_base)
|
|
while a_packet.pts < target_pts:
|
|
# Mux audio packet and adjust points until target hit
|
|
buffer.output.mux(a_packet)
|
|
a_packet.pts += a_packet.duration
|
|
a_packet.dts += a_packet.duration
|
|
audio_packets[buffer.astream] = a_packet
|
|
|
|
# Assign the video packet to the new stream & mux
|
|
packet.stream = buffer.vstream
|
|
buffer.output.mux(packet)
|