~cedric/newspipe

ref: f4b3f421cecf7094aaa4ec1cfa8b820fd2e0b525 newspipe/source/mongodb.py -rw-r--r-- 10.3 KiB
f4b3f421 — Cédric Bonhomme Updated documentation: new link for the example of the HTML auto-generated archive. 8 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
#! /usr/bin/env python
# -*- coding: utf-8 -*-

# pyAggr3g470r - A Web based news aggregator.
# Copyright (C) 2010-2013  Cédric Bonhomme - http://cedricbonhomme.org/
#
# For more information : http://bitbucket.org/cedricbonhomme/pyaggr3g470r/
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>

__author__ = "Cedric Bonhomme"
__version__ = "$Revision: 0.6 $"
__date__ = "$Date: 2012/03/03 $"
__revision__ = "$Date: 2013/02/01 $"
__copyright__ = "Copyright (c) Cedric Bonhomme"
__license__ = "GPLv3"

import pymongo

class Articles(object):
    """
    """
    def __init__(self, url='localhost', port=27017, db_name="pyaggr3g470r", user="", password=""):
        """
        Instantiates the connection.
        """
        self.db_name = db_name
        self.connection = pymongo.connection.Connection(url, port)
        self.db = pymongo.database.Database(self.connection, self.db_name)
        self.db.authenticate(user, password)

    def add_collection(self, new_collection):
        """
        Creates a new collection for a new feed.
        """
        collection = self.db[new_collection["feed_id"]]
        collection.create_index([("article_date", pymongo.DESCENDING)], {"unique":False, "sparse":False})
        collection.ensure_index('article_content', pymongo.ASCENDING)
        collection.insert(new_collection)

    def add_articles(self, articles, feed_id):
        """
        Add article(s) in a collection.
        """
        collection = self.db[str(feed_id)]
        for article in articles:
            cursor = collection.find({"article_id":article["article_id"]})
            if cursor.count() == 0:
                collection.insert(article)

    def delete_feed(self, feed_id):
        """
        Delete a collection (feed with all articles).
        """
        self.db.drop_collection(feed_id)

    def delete_article(self, feed_id, article_id):
        """
        Delete an article.
        """
        collection = self.db[str(feed_id)]
        collection.remove(spec_or_id={"article_id":article_id}, safe=True)

    def get_feed(self, feed_id):
        """
        Return information about a feed (collection).
        Return None if the collection does not exist.
        """
        try:
            return next(self.db[str(feed_id)].find())
        except:
            return None

    def get_all_feeds(self, condition=None):
        """
        Return all feeds object. The returned list
        is sorted by alphabetically (by feed name).
        """
        feeds = []
        collections = self.db.collection_names()
        for collection_name in collections:
            if collection_name != "system.indexes":
                if condition is None:
                    cursor = self.db[collection_name].find({"type":0})
                else:
                    cursor = self.db[collection_name].find({"type":0, condition[0]:condition[1]})
                if cursor.count() != 0:
                    feeds.append(next(cursor))
        feeds.sort(key = lambda elem: elem['feed_title'].lower())
        return feeds

    def get_articles(self, feed_id=None, article_id=None, condition=None, limit=1000000000):
        """
        Return one or several articles.
        The parameter "condition" is an optional requirement, for example:
        get_articles(feed_id, condition=("article_readed", False)) will
        return all unread articles of the feed 'feed_id'.
        """
        if feed_id == None and article_id == None:
            # Return all articles.
            articles = []
            collections = self.db.collection_names()
            for collection_name in collections:
                collection = self.db[collection_name]
                if condition is None:
                    articles.extend(collection.find({"type":1}, limit=limit))
                else:
                    articles.extend(collection.find({"type":1, condition[0]:condition[1]}, limit=limit))
            return articles

        elif feed_id != None and article_id == None:
            # Return all the articles of a collection.
            collection = self.db[str(feed_id)]
            if condition is None:
                cursor = collection.find({"type":1}, limit=limit)
            else:
                cursor = collection.find({"type":1, condition[0]:condition[1]}, limit=limit)
            return cursor.sort([("article_date", pymongo.DESCENDING)])

        elif feed_id != None and article_id != None:
            # Return a precise article.
            collection = self.db[str(feed_id)]
            try:
                return next(collection.find({"article_id":article_id}))
            except:
                return False

    def get_favorites(self, feed_id=None):
        """
        Return favorites articles.
        """
        if feed_id is not None:
            # only for a feed
            collection = self.db[feed_id]
            cursor = collection.find({'type':1, 'article_like':True})
            return cursor.sort([("article_date", pymongo.DESCENDING)])
        else:
            favorites = []
            for feed_id in self.db.collection_names():
                favorites += self.get_favorites(feed_id)
            return favorites

    def nb_articles(self, feed_id=None):
        """
        Return the number of articles of a feed
        or of all the database.
        """
        if feed_id is not None:
            collection = self.db[feed_id]
            cursor = collection.find({'type':1})
            return cursor.count()
        else:
            nb_articles = 0
            for feed_id in self.db.collection_names():
               nb_articles += self.nb_articles(feed_id)
            return nb_articles

    def nb_unread_articles(self, feed_id=None):
        """
        Return the number of unread articles of a feed
        or of all the database.
        """
        if feed_id is not None:
            return self.get_articles(feed_id=feed_id, condition=("article_readed", False)).count()
        else:
            return len(self.get_articles(condition=("article_readed", False)))

    def like_article(self, like, feed_id, article_id):
        """
        Like or unlike an article.
        """
        collection = self.db[str(feed_id)]
        collection.update({"article_id": article_id}, {"$set": {"article_like": like}})

    def nb_favorites(self, feed_id=None):
        """
        Return the number of favorites articles of a feed
        or of all the database.
        """
        if feed_id is not None:
            return self.get_favorites(feed_id).count()
        else:
            return len(self.get_favorites())

    def nb_mail_notifications(self):
        """
        Return the number of subscribed feeds.
        """
        nb_mail_notifications = 0
        for feed_id in self.db.collection_names():
            collection = self.db[feed_id]
            cursor = collection.find({'type':0, 'mail':True})
            nb_mail_notifications += cursor.count()
        return nb_mail_notifications

    def mark_as_read(self, readed, feed_id=None, article_id=None):
        """
        """
        if feed_id != None and article_id != None:
            collection = self.db[str(feed_id)]
            collection.update({"article_id": article_id, "article_readed":not readed}, {"$set": {"article_readed": readed}})
        elif feed_id != None and article_id == None:
            collection = self.db[str(feed_id)]
            collection.update({"type": 1, "article_readed":not readed}, {"$set": {"article_readed": readed}}, multi=True)
        else:
            for feed_id in self.db.collection_names():
                self.mark_as_read(readed, feed_id, None)

    def update_feed(self, feed_id, changes):
        """
        Update a feed.
        """
        collection = self.db[str(feed_id)]
        collection.update({"type": 0, "feed_id":feed_id}, {"$set": changes}, multi=True)

    def full_search(self, term):
        """
        Indexed full text search through content of articles.
        """
        articles = {}
        for collection in self.get_all_feeds():
            result = self.db[collection["feed_id"]].find({'article_content': {'$regex': term, "$options": 'i' }})
            if result.count() != 0:
                articles[collection["feed_id"]] = result.sort([("article_date", pymongo.DESCENDING)])
        return articles

    # Functions on database
    def drop_database(self):
        """
        Drop all the database
        """
        self.connection.drop_database(self.db_name)


if __name__ == "__main__":
    # Point of entry in execution mode.
    articles = Articles()
    # Create a collection for a stream
    collection_dic = {"collection_id": 42,\
                        "feed_image": "Image", \
                        "feed_title": "Title", \
                        "feed_link": "Link", \
                        "site_title": "Site link", \
                        "mail": True, \
                        }
    #articles.add_collection(collection_dic)

    # Add an article in the newly created collection
    article_dic1 = {"article_id": 51, \
                    "article_date": "Today", \
                    "article_link": "Link of the article", \
                    "article_title": "The title", \
                    "article_content": "The content of the article", \
                    "article_readed": True, \
                    "article_like": True \
                    }
    article_dic2 = {"article_id": 52, \
                    "article_date": "Yesterday", \
                    "article_link": "Link", \
                    "article_title": "Hello", \
                    "article_content": "The content of the article", \
                    "article_readed": True, \
                    "article_like": True \
                    }

    #articles.add_articles([article_dic1, article_dic2], 42)

    print("All articles:")
    #print articles.get_all_articles()


    # Drop the database
    #articles.drop_database()