~cedric/pyAggr3g470r

ref: 239ad36ce28b6adb0aa11cd5ed6994fdd59c7ecd pyAggr3g470r/sqlite2mongo.py -rw-r--r-- 2.5 KiB
239ad36c — cedricbonhomme MongoDB database stable: merging MongoDB branch in the default branch. 10 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#! /usr/bin/env python
# -*- coding: utf-8 -*-

import hashlib
import sqlite3

import mongodb

SQLITE_BASE = "./var/feed.db"


def sqlite2mongo():
    """
    Load feeds and articles in a dictionary.
    """
    mongo = mongodb.Articles()
    list_of_feeds = []
    list_of_articles = []

    try:
        conn = sqlite3.connect(SQLITE_BASE, isolation_level = None)
        c = conn.cursor()
        list_of_feeds = c.execute("SELECT * FROM feeds").fetchall()
    except:
        pass

    if list_of_feeds != []:
        # Walk through the list of feeds
        for feed in list_of_feeds:
            try:
                list_of_articles = c.execute(\
                        "SELECT * FROM articles WHERE feed_link='" + \
                        feed[2] + "'").fetchall()
            except:
                continue
            sha1_hash = hashlib.sha1()
            sha1_hash.update(feed[2].encode('utf-8'))
            feed_id = sha1_hash.hexdigest()

            new_collection = {"feed_id" : feed_id.encode('utf-8'), \
                                "type": 0, \
                                "feed_image" : feed[3].encode('utf-8'), \
                                "feed_title" : feed[0].encode('utf-8'), \
                                "feed_link" : feed[2].encode('utf-8'), \
                                "site_link" : feed[1].encode('utf-8'), \
                                "mail" : feed[4]=="1"}

            mongo.add_collection(new_collection)

            if list_of_articles != []:
                # Walk through the list of articles for the current feed.
                articles = []
                for article in list_of_articles:
                    sha1_hash = hashlib.sha1()
                    sha1_hash.update(article[2].encode('utf-8'))
                    article_id = sha1_hash.hexdigest()

                    article = {"article_id": article_id.encode('utf-8'), \
                                "type":1, \
                                "article_date": article[0].encode('utf-8'), \
                                "article_link": article[2].encode('utf-8'), \
                                "article_title": article[1].encode('utf-8'), \
                                "article_content": article[3].encode('utf-8'), \
                                "article_readed": article[4]=="1", \
                                "article_like": article[6]=="1" \
                                }

                    articles.append(article)

                mongo.add_articles(articles, feed_id)

        c.close()

if __name__ == "__main__":
    sqlite2mongo()