~cedric/pyAggr3g470r

901fbd154f16268ca4c9d10af8d038d684c8c4f4 — cedricbonhomme 11 years ago 26d44b3
Porting to Python 3.2. Better, faster, stronger.
M .hgsubstate => .hgsubstate +1 -1
@@ 1,1 1,1 @@
95fa18e08f277df2f4d03da06e0fe8c7c8cd4e8e source/qrcode
43105ca671ddb7b856866a4e532dc2f21aa13290 source/qrcode

M source/auth.py => source/auth.py +3 -3
@@ 62,11 62,11 @@ def check_credentials(username, password):
        USERS[row[0]] = row[1]

    m = hashlib.sha1()
    m.update(password)
    if username in USERS.keys() and USERS[username] == m.hexdigest():
    m.update(password.encode())
    if username in list(USERS.keys()) and USERS[username] == m.hexdigest():
        return None
    else:
        return u"Incorrect username or password."
        return "Incorrect username or password."

    # An example implementation which uses an ORM could be:
    # u = User.get(username)

M source/cfg/cherrypy.cfg => source/cfg/cherrypy.cfg +1 -1
@@ 21,4 21,4 @@ tools.staticdir.match = "(?i)^.+\.css$"
[/images]
tools.staticdir.on = True
tools.staticdir.dir = "img"
tools.staticdir.match = "(?i)^.+\.png$"
\ No newline at end of file
tools.staticdir.match = "(?i)^.+\.png$"

M source/conf.py => source/conf.py +2 -2
@@ 28,9 28,9 @@ __license__ = "GPLv3"


import os
import ConfigParser
import configparser
# load the configuration
config = ConfigParser.SafeConfigParser()
config = configparser.SafeConfigParser()
try:
    config.read("./cfg/pyAggr3g470r.cfg")
except:

M source/export.py => source/export.py +3 -3
@@ 160,8 160,8 @@ def export_pdf(feeds):
    Export the articles given in parameter in PDF files.
    """
    from xhtml2pdf import pisa
    import cStringIO as StringIO
    for feed in feeds.values():
    import io as StringIO
    for feed in list(feeds.values()):
            # creates folder for each stream
            folder = utils.path + "/var/export/pdf/" + \
                    utils.normalize_filename(feed.feed_title.strip().replace(':', '').lower())


@@ 171,7 171,7 @@ def export_pdf(feeds):
                # directories already exists (not a problem)
                pass

            for article in feed.articles.values():
            for article in list(feed.articles.values()):
                name = article.article_date.strip().replace(' ', '_')
                name = os.path.normpath(folder + "/" + name + ".pdf")
                

M source/feedgetter.py => source/feedgetter.py +5 -5
@@ 29,7 29,7 @@ __license__ = "GPLv3"
import hashlib
import threading
import feedparser
from BeautifulSoup import BeautifulSoup
from bs4 import BeautifulSoup

from datetime import datetime



@@ 105,9 105,9 @@ class FeedGetter(object):
        collection_dic = {"feed_id": feed_id, \
                            "type": 0, \
                            "feed_image": feed_image, \
                            "feed_title": utils.clear_string(a_feed.feed.title.encode('utf-8')), \
                            "feed_title": utils.clear_string(a_feed.feed.title), \
                            "feed_link": feed_link, \
                            "site_link": a_feed.feed.link.encode('utf-8'), \
                            "site_link": a_feed.feed.link, \
                            "mail": False \
                        }



@@ 140,7 140,7 @@ class FeedGetter(object):
            article = {"article_id": article_id, \
                    "type":1, \
                    "article_date": post_date, \
                    "article_link": article.link.encode('utf-8'), \
                    "article_link": article.link, \
                    "article_title": article_title, \
                    "article_content": description, \
                    "article_readed": False, \


@@ 153,7 153,7 @@ class FeedGetter(object):

        # send new articles by e-mail if desired.
        #threading.Thread(None, utils.send_mail, None, (conf.mail_from, conf.mail_to, \
                            #a_feed.feed.title.encode('utf-8'), \
                            #a_feed.feed.title, \
                            #article_title, description) \
                        #).start()


M source/mongodb.py => source/mongodb.py +4 -4
@@ 78,7 78,7 @@ class Articles(object):
        """
        Return information about a feed.
        """
        return self.db[str(feed_id)].find().next()
        return next(self.db[str(feed_id)].find())

    def get_all_feeds(self, condition=None):
        """


@@ 93,7 93,7 @@ class Articles(object):
                else:
                    cursor = self.db[collection_name].find({"type":0, condition[0]:condition[1]})
                if cursor.count() != 0:
                    feeds.append(cursor.next())
                    feeds.append(next(cursor))
        feeds.sort(key = lambda elem: elem['feed_title'].lower())
        return feeds



@@ 114,7 114,7 @@ class Articles(object):
        Get an article of a specified feed.
        """
        collection = self.db[str(feed_id)]
        return collection.find({"article_id":article_id}).next()
        return next(collection.find({"article_id":article_id}))

    def get_articles_from_collection(self, feed_id, condition=None, limit=1000000000):
        """


@@ 270,7 270,7 @@ if __name__ == "__main__":

    #articles.add_articles([article_dic1, article_dic2], 42)

    print "All articles:"
    print("All articles:")
    #print articles.get_all_articles()



M source/pyAggr3g470r.py => source/pyAggr3g470r.py +12 -11
@@ 52,8 52,8 @@ import export
import mongodb
import feedgetter
from auth import AuthController, require, member_of, name_is
from qrcode.pyqrnative.PyQRNative import QRCode, QRErrorCorrectLevel, CodeOverflowException
from qrcode import qr
#from qrcode.pyqrnative.PyQRNative import QRCode, QRErrorCorrectLevel, CodeOverflowException
#from qrcode import qr


def error_page_404(status, message, traceback, version):


@@ 193,7 193,7 @@ class pyAggr3g470r(object):

                # Descrition for the CSS ToolTips
                article_content = utils.clear_string(article["article_content"])
                if article_content:
                if False:
                    description = " ".join(article_content.split(' ')[:55])
                else:
                    description = "No description."


@@ 407,7 407,7 @@ class pyAggr3g470r(object):

                        # descrition for the CSS ToolTips
                        article_content = utils.clear_string(article["article_content"])
                        if article_content:
                        if False:
                            description = " ".join(article_content[:500].split(' ')[:-1])
                        else:
                            description = "No description."


@@ 481,7 481,7 @@ class pyAggr3g470r(object):
            html += description + "\n<br /><br /><br />"
        else:
            html += "No description available.\n<br /><br /><br />"

        """
        # Generation of the QR Code for the current article
        try:
            os.makedirs("./var/qrcode/")


@@ 496,19 496,20 @@ class pyAggr3g470r(object):
                f = qr.QRUrl(url = "URL too long.")
                f.make()
            f.save("./var/qrcode/"+article_id+".png")
        """

        # Previous and following articles
        previous, following = None, None
        liste = self.mongo.get_articles_from_collection(feed_id)
        for current_article in self.mongo.get_articles_from_collection(feed_id):
            articles.next()
            next(articles)
            if current_article["article_id"] == article_id:
                break
            following = current_article
        if following is None:
            following = liste[liste.count()-1]
        try:
            previous = articles.next()
            previous = next(articles)
        except StopIteration:
            previous = liste[0]



@@ 926,7 927,7 @@ class pyAggr3g470r(object):
            query_string = "year:" + the_year + "-month"
        if "month" not in query:
            html += '<div style="width: 35%; overflow:hidden; text-align: justify">' + \
                        utils.tag_cloud([(elem, timeline[elem]) for elem in timeline.keys()], query_string) + '</div>'
                        utils.tag_cloud([(elem, timeline[elem]) for elem in list(timeline.keys())], query_string) + '</div>'
        html += '<br /><br /><h1>Search with a month+year picker</h1>\n'
        html += '<form>\n\t<input name="m" type="month">\n\t<input type="submit" value="Go">\n</form>'
        html += '<hr />'


@@ 1217,8 1218,8 @@ class pyAggr3g470r(object):
        getattr(export, export_method)(self.mongo)
        try:
            getattr(export, export_method)(self.mongo)
        except Exception, e:
            print e
        except Exception as e:
            print(e)
            return self.error_page(e)
        return self.management()



@@ 1231,7 1232,7 @@ class pyAggr3g470r(object):
        """
        try:
            from epub import ez_epub
        except Exception, e:
        except Exception as e:
            return self.error_page(e)
        try:
            feed_id, article_id = param.split(':')

M source/utils.py => source/utils.py +19 -19
@@ 37,18 37,18 @@ __license__ = "GPLv3"
import os
import re
import operator
import urlparse
import urllib.parse
import calendar
import unicodedata
import htmlentitydefs
import html.entities

import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText

import urllib2
import BaseHTTPServer
from BeautifulSoup import BeautifulSoup
import urllib.request, urllib.error, urllib.parse
import http.server
from bs4 import BeautifulSoup

from datetime import datetime
from collections import Counter


@@ 70,14 70,14 @@ def detect_url_errors(list_of_urls):
    """
    errors = []
    for url in list_of_urls:
        req = urllib2.Request(url)
        req = urllib.request.Request(url)
        try:
            urllib2.urlopen(req)
        except urllib2.HTTPError, e:
            urllib.request.urlopen(req)
        except urllib.error.HTTPError as e:
            # server couldn't fulfill the request
            errors.append((url, e.code, \
                BaseHTTPServer.BaseHTTPRequestHandler.responses[e.code][1]))
        except urllib2.URLError, e:
                http.server.BaseHTTPRequestHandler.responses[e.code][1]))
        except urllib.error.URLError as e:
            # failed to reach the server
            errors.append((url, e.reason.errno ,e.reason.strerror))
    return errors


@@ 87,9 87,9 @@ def clear_string(data):
    Clear a string by removing HTML tags, HTML special caracters
    and consecutive white spaces (more that one).
    """
    p = re.compile(r'<[^<]*?/?>') # HTML tags
    q = re.compile(r'\s') # consecutive white spaces
    return p.sub('', q.sub(' ', data))
    p = re.compile(b'<[^<]*?/?>') # HTML tags
    q = re.compile(b'\s') # consecutive white spaces
    return p.sub(b'', q.sub(b' ', bytes(data, "utf-8"))).decode("utf-8", "strict")

def unescape(text):
    """


@@ 101,15 101,15 @@ def unescape(text):
            # character reference
            try:
                if text[:3] == "&#x":
                    return unichr(int(text[3:-1], 16))
                    return chr(int(text[3:-1], 16))
                else:
                    return unichr(int(text[2:-1]))
                    return chr(int(text[2:-1]))
            except ValueError:
                pass
        else:
            # named entity
            try:
                text = unichr(htmlentitydefs.name2codepoint[text[1:-1]])
                text = chr(html.entities.name2codepoint[text[1:-1]])
            except KeyError:
                pass
        return text # leave as is


@@ 244,7 244,7 @@ def change_feed_url(old_feed_url, new_feed_url):
    # Replace the URL in the text file
    with open("./var/feed.lst", "r") as f:
        lines = f.readlines()
    lines = map(str.strip, lines)
    lines = list(map(str.strip, lines))
    try:
        lines[lines.index(old_feed_url)] = new_feed_url
    except:


@@ 271,7 271,7 @@ def search_feed(url):
    """
    soup = None
    try:
        page = urllib2.urlopen(url)
        page = urllib.request.urlopen(url)
        soup = BeautifulSoup(page)
    except:
        return None


@@ 279,6 279,6 @@ def search_feed(url):
    feed_links.extend(soup('link', type='application/rss+xml'))
    for feed_link in feed_links:
        if url not in feed_link['href']:
            return urlparse.urljoin(url, feed_link['href'])
            return urllib.parse.urljoin(url, feed_link['href'])
        return feed_link['href']
    return None