~heckyel/yt-local

8af98968dd4325d5686bfed109aa4ed18b17edbc — Astound 7 months ago 8f00cbc
Refactoring code and reuse INNERTUBE_CLIENTS
2 files changed, 110 insertions(+), 114 deletions(-)

M youtube/util.py
M youtube/watch.py
M youtube/util.py => youtube/util.py +36 -41
@@ 431,34 431,29 @@ class RateLimitedQueue(gevent.queue.Queue):
        gevent.queue.Queue.__init__(self)

    def get(self):
        self.lock.acquire()     # blocks if another greenlet currently has the lock
        if self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial:
            gevent.sleep(self.waiting_period)
            self.count_since_last_wait = 0

        elif self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial:
            self.surpassed_initial = True
            gevent.sleep(self.waiting_period)
            self.count_since_last_wait = 0

        self.count_since_last_wait += 1
        with self.lock:     # blocks if another greenlet currently has the lock
            if ((self.count_since_last_wait >= self.subsequent_bursts and self.surpassed_initial) or
                (self.count_since_last_wait >= self.initial_burst and not self.surpassed_initial)):
                self.surpassed_initial = True
                gevent.sleep(self.waiting_period)
                self.count_since_last_wait = 0

        if not self.currently_empty and self.empty():
            self.currently_empty = True
            self.empty_start = time.monotonic()
            self.count_since_last_wait += 1

        item = gevent.queue.Queue.get(self)     # blocks when nothing left
            if not self.currently_empty and self.empty():
                self.currently_empty = True
                self.empty_start = time.monotonic()

        if self.currently_empty:
            if time.monotonic() - self.empty_start >= self.waiting_period:
                self.count_since_last_wait = 0
                self.surpassed_initial = False
            item = gevent.queue.Queue.get(self)     # blocks when nothing left

            self.currently_empty = False
            if self.currently_empty:
                if time.monotonic() - self.empty_start >= self.waiting_period:
                    self.count_since_last_wait = 0
                    self.surpassed_initial = False

        self.lock.release()
                self.currently_empty = False

        return item
            return item


def download_thumbnail(save_directory, video_id):


@@ 667,25 662,6 @@ def to_valid_filename(name):

# https://github.com/yt-dlp/yt-dlp/blob/master/yt_dlp/extractor/youtube.py#L72
INNERTUBE_CLIENTS = {
    'android_music': {
        'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
        'INNERTUBE_CONTEXT': {
            'client': {
                'hl': 'en',
                'gl': 'US',
                'clientName': 'ANDROID_MUSIC',
                'clientVersion': '6.48.51',
                'osName': 'Android',
                'osVersion': '14',
                'androidSdkVersion': 34,
                'platform': 'MOBILE',
                'userAgent': 'com.google.android.apps.youtube.music/6.48.51 (Linux; U; Android 14; US) gzip'
            }
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
        'REQUIRE_JS_PLAYER': False
    },

    'android': {
        'INNERTUBE_API_KEY': 'AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w',
        'INNERTUBE_CONTEXT': {


@@ 721,6 697,25 @@ INNERTUBE_CLIENTS = {
        'REQUIRE_JS_PLAYER': False
    },

    'android_music': {
        'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI',
        'INNERTUBE_CONTEXT': {
            'client': {
                'hl': 'en',
                'gl': 'US',
                'clientName': 'ANDROID_MUSIC',
                'clientVersion': '6.48.51',
                'osName': 'Android',
                'osVersion': '14',
                'androidSdkVersion': 34,
                'platform': 'MOBILE',
                'userAgent': 'com.google.android.apps.youtube.music/6.48.51 (Linux; U; Android 14; US) gzip'
            }
        },
        'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
        'REQUIRE_JS_PLAYER': False
    },

    # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option)
    # See: https://github.com/zerodytrash/YouTube-Internal-Clients
    'tv_embedded': {

M youtube/watch.py => youtube/watch.py +74 -73
@@ 2,6 2,7 @@ import youtube
from youtube import yt_app
from youtube import util, comments, local_playlist, yt_data_extract
from youtube.util import time_utc_isoformat
from youtube.util import INNERTUBE_CLIENTS
import settings

from flask import request


@@ 369,83 370,83 @@ def fetch_watch_page_info(video_id, playlist_id, index):
    return yt_data_extract.extract_watch_info_from_html(watch_page)

def extract_info(video_id, use_invidious, playlist_id=None, index=None):
    tasks = (
        # Get video metadata from here
        gevent.spawn(fetch_watch_page_info, video_id, playlist_id, index),
        gevent.spawn(fetch_player_response, 'android', video_id)
    )
    gevent.joinall(tasks)
    util.check_gevent_exceptions(*tasks)
    info, player_response = tasks[0].value, tasks[1].value

    yt_data_extract.update_with_new_urls(info, player_response)
    for client in INNERTUBE_CLIENTS:
        tasks = (
            gevent.spawn(fetch_watch_page_info, video_id, playlist_id, index),
            gevent.spawn(fetch_player_response, client, video_id)  # Use client from INNERTUBE_CLIENTS
        )
        gevent.joinall(tasks)
        util.check_gevent_exceptions(*tasks)
        info, player_response = tasks[0].value, tasks[1].value

    # Age restricted video, retry
    if info['age_restricted'] or info['player_urls_missing']:
        if info['age_restricted']:
            print('Age restricted video, retrying')
        else:
            print('Player urls missing, retrying')
        player_response = fetch_player_response('tv_embedded', video_id)
        yt_data_extract.update_with_new_urls(info, player_response)

    # signature decryption
    decryption_error = decrypt_signatures(info, video_id)
    if decryption_error:
        decryption_error = 'Error decrypting url signatures: ' + decryption_error
        info['playability_error'] = decryption_error

    # check if urls ready (non-live format) in former livestream
    # urls not ready if all of them have no filesize
    if info['was_live']:
        info['urls_ready'] = False
        for fmt in info['formats']:
            if fmt['file_size'] is not None:
                info['urls_ready'] = True
    else:
        info['urls_ready'] = True

    # livestream urls
    # sometimes only the livestream urls work soon after the livestream is over
    if (info['hls_manifest_url']
        and (info['live'] or not info['formats'] or not info['urls_ready'])
    ):
        manifest = util.fetch_url(info['hls_manifest_url'],
            debug_name='hls_manifest.m3u8',
            report_text='Fetched hls manifest'
        ).decode('utf-8')

        info['hls_formats'], err = yt_data_extract.extract_hls_formats(manifest)
        if not err:
            info['playability_error'] = None
        for fmt in info['hls_formats']:
            fmt['video_quality'] = video_quality_string(fmt)
    else:
        info['hls_formats'] = []

    # check for 403. Unnecessary for tor video routing b/c ip address is same
    info['invidious_used'] = False
    info['invidious_reload_button'] = False
    info['tor_bypass_used'] = False
    if (settings.route_tor == 1
            and info['formats'] and info['formats'][0]['url']):
        try:
            response = util.head(info['formats'][0]['url'],
                                 report_text='Checked for URL access')
        except urllib3.exceptions.HTTPError:
            print('Error while checking for URL access:\n')
            traceback.print_exc()
            return info

        if response.status == 403:
            print('Access denied (403) for video urls.')
            print('Routing video through Tor')
            info['tor_bypass_used'] = True
        # Age restricted video, retry
        if info['age_restricted'] or info['player_urls_missing']:
            if info['age_restricted']:
                print('Age restricted video, retrying')
            else:
                print('Player urls missing, retrying')
            player_response = fetch_player_response('tv_embedded', video_id)
            yt_data_extract.update_with_new_urls(info, player_response)

        # signature decryption
        decryption_error = decrypt_signatures(info, video_id)
        if decryption_error:
            decryption_error = 'Error decrypting url signatures: ' + decryption_error
            info['playability_error'] = decryption_error

        # check if urls ready (non-live format) in former livestream
        # urls not ready if all of them have no filesize
        if info['was_live']:
            info['urls_ready'] = False
            for fmt in info['formats']:
                fmt['url'] += '&use_tor=1'
        elif 300 <= response.status < 400:
            print('Error: exceeded max redirects while checking video URL')
    return info
                if fmt['file_size'] is not None:
                    info['urls_ready'] = True
        else:
            info['urls_ready'] = True

        # livestream urls
        # sometimes only the livestream urls work soon after the livestream is over
        if (info['hls_manifest_url']
            and (info['live'] or not info['formats'] or not info['urls_ready'])
        ):
            manifest = util.fetch_url(info['hls_manifest_url'],
                debug_name='hls_manifest.m3u8',
                report_text='Fetched hls manifest'
            ).decode('utf-8')

            info['hls_formats'], err = yt_data_extract.extract_hls_formats(manifest)
            if not err:
                info['playability_error'] = None
            for fmt in info['hls_formats']:
                fmt['video_quality'] = video_quality_string(fmt)
        else:
            info['hls_formats'] = []

        # check for 403. Unnecessary for tor video routing b/c ip address is same
        info['invidious_used'] = False
        info['invidious_reload_button'] = False
        info['tor_bypass_used'] = False
        if (settings.route_tor == 1
                and info['formats'] and info['formats'][0]['url']):
            try:
                response = util.head(info['formats'][0]['url'],
                                     report_text='Checked for URL access')
            except urllib3.exceptions.HTTPError:
                print('Error while checking for URL access:\n')
                traceback.print_exc()
                return info

            if response.status == 403:
                print('Access denied (403) for video urls.')
                print('Routing video through Tor')
                info['tor_bypass_used'] = True
                for fmt in info['formats']:
                    fmt['url'] += '&use_tor=1'
            elif 300 <= response.status < 400:
                print('Error: exceeded max redirects while checking video URL')
        return info


def video_quality_string(format):