~josealberto4444/apodnasabot

ec56641411359f1697f9c9c7e5d45ab91b100a02 — José Alberto Orejuela García 4 years ago fa638d8
Refactor code

It creates the Apod class called from every bot and also useful functions. It
seems to be a (bit) decent library.
1 files changed, 99 insertions(+), 59 deletions(-)

M api.py
M api.py => api.py +99 -59
@@ 14,6 14,8 @@
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

import configparser
import datetime
import json
import os.path
import re
import requests


@@ 24,71 26,109 @@ def read_config(section, key):
    config.read('config.cfg')
    return config[section][key]

def ask_api(api_key, date):
    baseurl = 'https://api.nasa.gov/planetary/apod'
    payload = {'api_key': api_key, 'date': date}
    r = requests.get(baseurl, params=payload)
    return r.json()
def is_invalid(date):
    try:
        datetime.datetime.strptime(date, '%Y-%m-%d')
    except ValueError:
        return "Incorrect date format, should be YYYY-MM-DD."
    else:
        return False

def save_json(filename, api_response):
    with open(filename + '.json', 'wt') as f:
        f.write(str(api_response))
class Apod:

def save_media(filename, api_response):
    if api_response['media_type'] == 'image':
        r = requests.get(api_response['hdurl'])
        filename = filename + '.jpg'
        with open(filename, 'wb') as f:
            for chunk in r.iter_content(chunk_size=128):
                f.write(chunk)
    elif api_response['media_type'] == 'video':
        filename = filename + '.%(ext)s'
        ydl_opts = {'outtmpl': filename, 'quiet': True}
        with youtube_dl.YoutubeDL(ydl_opts) as ydl:
            ydl.download([api_response['url']])
    def __init__(self, *args):
        self.api_key = read_config('NASA_API', 'api_key')
        if args:
            self.date = args[0]
        else:
            self.date = ''
            self.date = self.ask_api()['date']
        self.filename = 'data/' + self.date
        self.error = False
        self.consult()
        if not self.error:
            self.title = self.api_response['title']
            self.media_type = self.api_response['media_type']
            if self.media_type == 'image':
                self.link = self.api_response['hdurl']
            elif self.media_type == 'video':
                self.link = self.api_response['url']
            self.explanation()

def save_api_response(api_response):
    filename = 'data/' + api_response['date']
    save_json(filename, api_response)
    save_media(filename, api_response)
    def ask_api(self):
        baseurl = 'https://api.nasa.gov/planetary/apod'
        payload = {'api_key': self.api_key, 'date': self.date}
        r = requests.get(baseurl, params=payload)
        return r.json()

def intl_to_short(date):
    shortdate = date.replace('-', '')
    shortdate = shortdate[2:]
    return shortdate
    def consult(self):
        if os.path.exists('data/' + self.date + '.json'):
            with open(self.filename + '.json', 'rt') as f:
                self.api_response = json.load(f)
        else:
            self.api_response = self.ask_api()
            if 'code' in self.api_response:
                if self.api_response['code'] == 400:
                    self.error = self.api_response['msg']
                else:
                    self.error = self.api_response['code'] + ': ' + self.api_response['msg']
            else:
                with open(self.filename + '.json', 'wt') as f:
                    json.dump(self.api_response, f)

def get_userpage(date):
    shortdate = intl_to_short(date)
    url = 'https://apod.nasa.gov/apod/ap' + shortdate + '.html'
    payload = {}
    r = requests.get(url, params=payload)
    return r.text
    def get_userpage(self):
        shortdate = self.date.replace('-', '')
        shortdate = shortdate[2:]
        url = 'https://apod.nasa.gov/apod/ap' + shortdate + '.html'
        payload = {}
        r = requests.get(url, params=payload)
        return r.text

def scrap_description(pagesource):
    re_description = re.compile("Explanation: </b> (.*?)<p>", flags=re.DOTALL)
    description = re_description.search(pagesource).groups()[0]
    description = description.replace('\n>', '>')
    description = description.replace('\n', ' ')
    description = re.sub('\s+', ' ', description).strip()
    return description
    def scrap_explanation(self, pagesource):
        re_explanation = re.compile("Explanation: </b> (.*?)<p>", flags=re.DOTALL) # Compile regex for extracting explanation.
        explanation = re_explanation.search(pagesource).groups()[0] # Extract explanation.
        explanation = explanation.replace('/\n', '/') # Fix split URLs along several lines.
        explanation = explanation.replace('\n>', '>') # Fix split HTML tags.
        explanation = explanation.replace('<a/>', '</a>') # Fix typos (they seem to write the HTML by hand, yes).
        explanation = explanation.replace('\n', ' ') # Delete all newlines.
        explanation = re.sub('\s+', ' ', explanation).strip() # Substitute repeated spaces and strips the ones at the beginning and the end of the string.
        explanation = re.sub(r'<a([^>]*)href=["\'](?!http)([^"\']*)["\']([^>]*)>', r'<a\1href="https://apod.nasa.gov/apod/\2"\3>', explanation) # Change relative paths to absolute.
        return explanation

def save_description(date, description):
    filename = 'data/' + date
    with open(filename + '.html', 'wt') as f:
        f.write(description)
    def save_explanation(self, explanation):
        with open(self.filename + '.html', 'wt') as f:
            f.write(explanation)

date = '2019-07-10'
    def explanation(self):
        filename = self.filename + '.html'
        if os.path.exists(filename):
            with open(filename, 'rt') as f:
                self.explanation = f.read()
            self.html = True
        else:
            try:
                userpage = self.get_userpage()
                explanation = self.scrap_explanation(userpage)
            except:
                explanation = self.api_response['explanation']
                self.html = False
            else:
                self.save_explanation(explanation)
                self.html = True
            self.explanation = explanation

if not os.path.exists('data/' + date + '.json'):
    api_key = read_config('NASA_API', 'api_key')
    api_response = ask_api(api_key, date)
    if 'code' in api_response:
        if api_response['code'] == 400:
            print(api_response['msg'])
    else:
        save_api_response(api_response)
        userpage = get_userpage(date) # TO-DO: This line and the following are prone to errors, use a try-catch structure.
        description = scrap_description(userpage)
        save_description(date, description)
else:
    print('It was already downloaded.')
# TO-DO: Check if already downloaded first
#    def download_media():
#        if self.media_type == 'image':
#            link = self.api_response['hdurl']
#            r = requests.get(link)
#            extension = os.path.splitext(link)[1]
#            filename = self.filename + extension
#            with open(filename, 'wb') as f:
#                for chunk in r.iter_content(chunk_size=128):
#                    f.write(chunk)
#        elif self.media_type == 'video':
#            filename = self.filename + '.%(ext)s'
#            ydl_opts = {'outtmpl': filename, 'quiet': True}
#            with youtube_dl.YoutubeDL(ydl_opts) as ydl:
#                ydl.download([api_response['url']])