~bronikowski/radiogopher

bb4d0980dad80279523bb17162ea0e7eace4325a — Emil Oppeln-Bronikowski 1 year, 2 months ago ed1f1ae
Faux user-agents and recording of last update
4 files changed, 29 insertions(+), 3 deletions(-)

M __main__.py
M collector.py
M feeds.py
M requirements.txt
M __main__.py => __main__.py +1 -1
@@ 3,7 3,7 @@ import sys
import collector
import exceptions

for feed in collector.feeds():
for feed in collector.get_feeds():

    try:
        feed = collector.parse_feed(feed)

M collector.py => collector.py +19 -2
@@ 7,12 7,16 @@ import os
import progressbar
import filetype
import config
import feeds
from fake_useragent import UserAgent

widgets = [progressbar.Percentage(), progressbar.Bar()]

configuration = config.Application('application')
subscribed_feeds = config.Feeds('feeds')

ua = UserAgent()

def parse_feed(feed_uri):

    parsed = feedparser.parse(feed_uri)


@@ 22,13 26,21 @@ def parse_feed(feed_uri):

    return parsed

def feeds():
def get_feeds():
    return subscribed_feeds['podcasts'].keys()

def extract_media(feed):

    if not feed.entries:
        raise exceptions.NoEntries()

    print("→ {}".format(feed.feed.title))
    last_seen = feeds.last_seen(feed.href)
    if last_seen:
        if last_seen == feed.updated_parsed:
            print("Already saw everything")
            return True

    for entry in feed.entries:
        if not entry.links:
            continue


@@ 36,13 48,18 @@ def extract_media(feed):
            if link['rel'] == 'enclosure' and link['type'].startswith('audio/'):
                download_media(entry['id'], link['href'], entry['title'])
        break
    feeds.last_seen(feed.href, feed.updated_parsed)

def download_media(media_id, media_uri, title=None):

    hashed_media_id = hashlib.sha1()
    hashed_media_id.update(media_id.encode('utf-8'))

    with requests.get(media_uri, stream=True) as resp:
    headers = {'User-Agent': ua.random}

    with requests.get(media_uri, stream=True, allow_redirects=True, headers=headers) as resp:
        if resp.status_code != 200:
            return False
        bar = progressbar.ProgressBar(
            max_value=int(resp.headers['content-length']),
            widgets=widgets

M feeds.py => feeds.py +8 -0
@@ 1,4 1,5 @@
import config
import time

subscribed_feeds = config.Feeds('feeds')



@@ 12,3 13,10 @@ def add(feed):
def remove(feed):
    del subscribed_feeds['podcasts'][feed]
    subscribed_feeds.save()

def last_seen(feed, dt=None):
    if dt:
        subscribed_feeds['podcasts'][feed]['last_checked'] = dt
        subscribed_feeds.save()
    else:
        return time.struct_time(subscribed_feeds['podcasts'][feed]['last_checked'])

M requirements.txt => requirements.txt +1 -0
@@ 4,3 4,4 @@ requests
progressbar2
filetype
appdirs
fake-useragent