~ihabunek/vampires

ref: 5d4d09d033430022df834577ff0718435f284884 vampires/parse.py -rwxr-xr-x 2.3 KiB
5d4d09d0Ivan Habunek Better image handling 8 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
#!/usr/bin/env python3

import sys
from lxml import etree
from base64 import b64decode


def extract_slides(root):
    for index, node in enumerate(root.findall(".//DefineBitsJPEG2")):
        data = node.find("data").find("data").text
        filename = f"slides/slide_{index:0>2}.jpg"
        print(f"Saving: {filename}")

        with open(filename, "wb") as f:
            f.write(b64decode(data))


def _extract_audio_segments(root):
    """Extract and yield audio from concurrent SoundStreamBlocks"""
    data = b""
    count = 0

    parent = root.find(".//SoundStreamBlock").getparent()
    for node in parent.findall("./*"):
        if node.tag == "ShowFrame":
            continue
        if node.tag == "SoundStreamBlock":
            block = b64decode(node.find("data").text)
            data += block[4:]  # remove 16bit header
            count += 1
        elif data:
            yield data, count
            data = b""
            count = 0


def _expand_audio_segments(root):
    """Join segments of 7 frames or smaller into the previous segment"""
    buffer = None
    buffer_count = 0

    for data, count in _extract_audio_segments(root):
        if buffer and count > 7:
            yield buffer, buffer_count
            buffer = data
            buffer_count = count
        elif buffer and count <= 7:
            buffer += data
            buffer_count += count
        else:
            buffer = data
            buffer_count = count

    if buffer:
        yield buffer, buffer_count


def extract_segmented_audio(root):
    total = 0
    all_data = b""
    for index, (data, count) in enumerate(_expand_audio_segments(root)):
        all_data += data
        minutes = int((total / 7) / 60)
        seconds = int(total / 7) - minutes * 60
        time = f"{minutes:0>2}:{seconds:0>2}"
        total += count
        print(f"Audio chunk @ {time} - {count} frames")

        # # Uncomment to save individual audio chunks
        # filename = f"audio/{index:0>2}_{time}_{count}.mp3"
        # print(filename)
        # with open(filename, "wb") as f:
        #     f.write(data)

    print("Saving: audio/full.mp3")
    with open("audio/full.mp3", "wb") as f:
        f.write(all_data)


with open(sys.argv[1], "r") as f:
    root = etree.parse(f).getroot()
    extract_slides(root)
    extract_segmented_audio(root)