#!/usr/bin/env python3
import os
import sys
from lxml import etree
from base64 import b64decode
def extract_slides(root):
if not os.path.isdir("slides"):
os.mkdir("slides")
for index, node in enumerate(root.findall(".//DefineBitsJPEG2")):
data = node.find("data").find("data").text
filename = f"slides/slide_{index + 1:0>2}.jpg"
print(f"Saving: {filename}")
with open(filename, "wb") as f:
f.write(b64decode(data))
def _extract_audio_segments(root):
"""Extract and yield audio from concurrent SoundStreamBlocks"""
data = b""
count = 0
parent = root.find(".//SoundStreamBlock").getparent()
for node in parent.findall("./*"):
if node.tag == "ShowFrame":
continue
if node.tag == "SoundStreamBlock":
block = b64decode(node.find("data").text)
data += block[4:] # remove 16bit header
count += 1
elif data:
yield data, count
data = b""
count = 0
def _expand_audio_segments(segments):
"""Join segments of 7 frames or smaller into the previous segment"""
buffer = None
buffer_count = 0
for data, count in segments:
if buffer and count > 7:
yield buffer, buffer_count
buffer = data
buffer_count = count
elif buffer and count <= 7:
buffer += data
buffer_count += count
else:
buffer = data
buffer_count = count
if buffer:
yield buffer, buffer_count
def extract_segmented_audio(root):
if not os.path.isdir("audio"):
os.mkdir("audio")
segments = _extract_audio_segments(root)
segments = _expand_audio_segments(segments)
total = 0
all_data = b""
for index, (data, count) in enumerate(segments):
all_data += data
minutes = int((total / 7) / 60)
seconds = int(total / 7) - minutes * 60
time = f"{minutes:0>2}:{seconds:0>2}"
print(f"Audio chunk @ {time} - {count} frames - {total} total")
total += count
# # Uncomment to save individual audio chunks
# filename = f"audio/{index:0>2}_{time}_{count}.mp3"
# with open(filename, "wb") as f:
# f.write(data)
print("Saving: audio/full.mp3")
with open("audio/full.mp3", "wb") as f:
f.write(all_data)
with open(sys.argv[1], "r") as f:
root = etree.parse(f).getroot()
extract_slides(root)
extract_segmented_audio(root)