~jack/msrss

fd3740f998cd5d12f31dc1de787dd21d6301ee0a — Jack Kelly 7 years ago 1b76c0d
Move to Python 3.
2 files changed, 14 insertions(+), 21 deletions(-)

M configure.ac
M msrss.in
M configure.ac => configure.ac +1 -1
@@ 9,7 9,7 @@ AM_INIT_AUTOMAKE([1.11 foreign -Wall -Werror])
AM_SILENT_RULES([yes])

# Checks for programs.
AM_PATH_PYTHON
AM_PATH_PYTHON([3])

# Checks for libraries.
AX_PYTHON_MODULE([PyRSS2Gen], [1])

M msrss.in => msrss.in +13 -20
@@ 19,19 19,19 @@
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import argparse
import cPickle as pickle
import feedparser
import hashlib
import itertools
import logging
import operator
import os
import pickle
import PyRSS2Gen as rss2
import re
import sys
import time
import urlparse
import urllib
import urllib.parse as urlparse

feedparser.USER_AGENT = 'MsRSS/@PACKAGE_VERSION@ +http://code.google.com/p/msrss'



@@ 57,7 57,7 @@ def load_feed(url, data_dir):
    ETags and the Last-Modified header are both used to prevent
    duplicate downloads. The cache file name is the SHA1sum of the
    URL.'''
    cache_file = os.path.join(data_dir, hashlib.sha1(url).hexdigest())
    cache_file = os.path.join(data_dir, hashlib.sha1(url.encode()).hexdigest())
    old_feed = None
    if os.path.exists(cache_file):
        with open(cache_file, 'rb') as f:


@@ 99,6 99,7 @@ def fix_url(url):
    """
    stolen shamelessly from
    https://github.com/HBehrens/feedsanitizer/blob/master/misc.py
    and then adapted for python3.

    Copyright (c) 2011 Heiko Behrens



@@ 125,43 126,35 @@ def fix_url(url):
    adapted from
    http://stackoverflow.com/questions/804336/best-way-to-convert-a-unicode-url-to-ascii-utf-8-percent-escaped-in-python/804380#804380
    """
    # turn string into unicode
    if not isinstance(url,unicode):
        url = url.decode('utf8')

    # parse it
    parsed = urlparse.urlsplit(url)
    scheme = parsed.scheme

    # divide the netloc further
    userpass,at,hostport = parsed.netloc.partition('@')
    if not hostport:
        hostport, userpass = userpass, hostport

    user,colon1,pass_ = userpass.partition(':')
    host,colon2,port = hostport.partition(':')
    user,_,pass_ = userpass.partition(':')
    host,_,port = hostport.partition(':')

    # encode each component
    scheme = parsed.scheme.encode('utf8')
    user = urllib.quote(user.encode('utf8'))
    colon1 = colon1.encode('utf8')
    pass_ = urllib.quote(pass_.encode('utf8'))
    at = at.encode('utf8')
    user = urlparse.quote(user.encode('utf8'))
    pass_ = urlparse.quote(pass_.encode('utf8'))
    host = host.encode('idna').lower()
    colon2 = colon2.encode('utf8')
    port = port.encode('utf8')
    path = '/'.join(  # could be encoded slashes!
        urllib.quote(urllib.unquote(pce).encode('utf8'),'')
        urlparse.quote(urlparse.unquote(pce).encode('utf8'),'')
        for pce in parsed.path.split('/')
    )
    if not path:
        path = "/"
    #query = urllib.quote(urllib.unquote(parsed.query).encode('utf8'),'=&?/')
    #fragment = urllib.quote(urllib.unquote(parsed.fragment).encode('utf8'))

    query = parsed.query
    fragment = parsed.fragment

    # put it back together
    netloc = ''.join((user,colon1,pass_,at,host,colon2,port))
    netloc = ''.join((user,':',pass_,at,host.decode(),':',port))
    return urlparse.urlunsplit((scheme,netloc,path,query,fragment))

def merge_items(feeds):


@@ 185,7 178,7 @@ def rfc2822(tm):
class PrintVersion(argparse.Action):
    def __call__(self, parser, namespace, values, option_string=None):
        print ('@PACKAGE_NAME@ @PACKAGE_VERSION@\n'
               + 'Copyright (C) 2013 Jack Kelly.\n'
               + 'Copyright (C) 2014 Jack Kelly.\n'
               + ('License GPLv3+: GNU GPL version 3 or later'
                  + ' <http://gnu.org/licenses/gpl.html>.\n')
               + ('This is free software:'