~fabrixxm/confy

ref: 00b88b02ca719529be4807b5b272cf4a817503d2 confy/src/remotes/pentabarf.py -rw-r--r-- 5.1 KiB
00b88b02fabrixxm [WIP] Timezone support 4 months ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# pentabarf.py
#
# Copyright 2020 Fabio
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import datetime
import time
import xml.etree.ElementTree as ET

from dateutil.tz import UTC, tzlocal

from .exceptions import InvalidFormatException
from .. import local
from ..models import Meta


def _local_to_utc_to_unset(dt):
    dt = dt.replace(tzinfo=tzlocal())
    dt = dt.astimezone(UTC)
    return dt.replace(tzinfo=None)

def _get_text(root, nodename):
    text = ""
    if root is not None:
        node = root.find(nodename)
        if node is not None:
            text = node.text
    return text




def import_pentabarf(xmlstr:str, url:str):
    """Import data from Pentabarf XML

    As far I can tell, Pentabarf XML doesn't declare timezone.
    We will import dates in local timezone, then convert to UTC, then
    remove timezone info to store dates in db (because sqlite3 cries otherwise)
    """
    root = ET.fromstring(xmlstr)
    if root.tag != "schedule":
        raise InvalidFormatException(_("Invalid pentabarf format"))

    econference = root.find("conference")
    base_url = _get_text(econference, "base_url")
    with Meta() as m:
        m.url = url
        m.last_update = time.time() # timestamp
        m.title = _get_text(econference, "title")
        m.start = _get_text(econference, "start")
        m.end = _get_text(econference, "end")
        m.venue = _get_text(econference, "venue")
        m.days = _get_text(econference, "days")
        m.city = _get_text(econference, "city")

    _db = local.getDb()

    for eday in root.iter('day'):
        date = eday.attrib['date']
        for eevent in eday.iter('event'):
            fulltextsearch = []
            eventid = eevent.attrib['id']
            start = _local_to_utc_to_unset(
                datetime.datetime.strptime(date + " " + eevent.find('start').text, "%Y-%m-%d %H:%M")
            )
            end = eevent.find('duration').text.split(":")
            end = start + datetime.timedelta(hours=int(end[0]), minutes=int(end[1]))
            evtdate = start.date()
            room = _get_text(eevent, 'room')
            slug = _get_text(eevent, 'slug')
            title = _get_text(eevent, 'title')
            subtitle = _get_text(eevent, 'subtitle')
            track = _get_text(eevent, 'track')
            evtype = _get_text(eevent, 'type')
            abstract = _get_text(eevent, 'abstract')
            description = _get_text(eevent, 'description')
            persons = [ (e.attrib.get('id', e.text), e.text) for e in eevent.iter('person') if e.text is not None ]

            fulltextsearch += [ s for s in [title, subtitle, abstract, description, room, track] if s is not None ]
            fulltextsearch += [p[1] for p in persons]

            links = []
            for e in eevent.iter('link'):
                name = e.text
                href = e.attrib.get('href', False)
                if not href:
                    href = name
                if base_url and href.startswith("/"):
                    href = base_url + href
                links.append((eventid, href, name))

            for e in eevent.iter('attachment'):
                name = e.text
                if e.attrib.get('type', False):
                    name = '{} ({})'.format(name, e.attrib['type'])
                href = e.attrib.get('href', False)
                if not href:
                    href = name
                if base_url and href.startswith("/"):
                    href = base_url + href
                links.append((eventid, href, name))

            _db.execute("""INSERT OR REPLACE INTO events
                            (id, date, start, end, room, slug, title, subtitle, track, type, abstract, description, starred)
                            VALUES (?,?,?,?,?,?,?,?,?,?,?,?, (SELECT starred FROM events WHERE id=?))""",
                            (eventid, evtdate, start, end, room, slug, title, subtitle, track, evtype, abstract, description, eventid))
            _db.executemany("INSERT OR REPLACE INTO persons (id, name) VALUES (?, ?)", persons)
            _db.executemany("""INSERT OR REPLACE INTO event_person (event_id, person_id)
                                VALUES (?, ?) ON CONFLICT DO NOTHING""",
                                [ (eventid, p[0]) for p in persons ])
            _db.executemany("""INSERT OR REPLACE INTO links
                                (event_id, href, name) VALUES (?,?,?)""", links)

            fulltextsearchstr = ' '.join(fulltextsearch)
            _db.execute("""INSERT OR REPLACE INTO fts_event (event_id, text) VALUES (?, ?)""", (eventid, fulltextsearchstr))
    _db.commit()