~cedric/newspipe

ae8fe47751f45d382981274ccd6d326a48a51e0c — Cédric Bonhomme 2 years ago 1915f5c
Improved the search of existing articles.
2 files changed, 12 insertions(+), 6 deletions(-)

M src/crawler/default_crawler.py
M src/web/models/article.py
M src/crawler/default_crawler.py => src/crawler/default_crawler.py +4 -2
@@ 135,8 135,10 @@ async def insert_articles(queue, nḅ_producers=1):
            new_article = await construct_article(article, feed)

            try:
                existing_article_req = art_contr.read(feed_id=feed.id,
                                entry_id=extract_id(article))
                existing_article_req = art_contr.read(
                                        user_id=user.id,
                                        feed_id=feed.id,
                                        entry_id=extract_id(article))
            except Exception as e:
                logger.exception("existing_article_req: " + str(e))
                continue

M src/web/models/article.py => src/web/models/article.py +8 -4
@@ 47,6 47,7 @@ class Article(db.Model, RightMixin):
    updated_date = db.Column(db.DateTime(), default=datetime.utcnow)
    retrieved_date = db.Column(db.DateTime(), default=datetime.utcnow)

    # foreign keys
    user_id = db.Column(db.Integer(), db.ForeignKey('user.id'))
    feed_id = db.Column(db.Integer(), db.ForeignKey('feed.id'))
    category_id = db.Column(db.Integer(), db.ForeignKey('category.id'))


@@ 58,10 59,13 @@ class Article(db.Model, RightMixin):
                                 foreign_keys='[ArticleTag.article_id]')
    tags = association_proxy('tag_objs', 'text')

    # index
    idx_article_uid = Index('user_id')
    idx_article_uid_cid = Index('user_id', 'category_id')
    idx_article_uid_fid = Index('user_id', 'feed_id')
    # indexes
    __table_args__ = (
        Index('user_id'),
        Index('user_id', 'category_id'),
        Index('user_id', 'feed_id'),
        Index('ix_article_uid_fid_eid', user_id, feed_id, entry_id)
    )

    # api whitelists
    @staticmethod