~josealberto4444/apodnasabot

0fda77d12828faa428ac39cc94fa1edb04a79730 — José Alberto Orejuela García 5 years ago 44dc26b
Scrap description with links from web page

The API does not give the description with the links, so I had to write a
scrapping tool to get it from the main page.
1 files changed, 30 insertions(+), 1 deletions(-)

M api.py
M api.py => api.py +30 -1
@@ 15,6 15,7 @@

import configparser
import os.path
import re
import requests
import youtube_dl



@@ 51,7 52,32 @@ def save_api_response(api_response):
    save_json(filename, api_response)
    save_media(filename, api_response)

date = '2029-08-14'
def intl_to_short(date):
    shortdate = date.replace('-', '')
    shortdate = shortdate[2:]
    return shortdate

def get_userpage(date):
    shortdate = intl_to_short(date)
    url = 'https://apod.nasa.gov/apod/ap' + shortdate + '.html'
    payload = {}
    r = requests.get(url, params = payload)
    return r.text

def scrap_description(pagesource):
    re_description = re.compile("Explanation: </b> (.*?)<p>", flags = re.DOTALL)
    description = re_description.search(pagesource).groups()[0]
    description = description.replace('\n>', '>')
    description = description.replace('\n', ' ')
    description = re.sub('\s+', ' ', description).strip()
    return description

def save_description(date, description):
    filename = 'data/' + date
    with open(filename + '.html', 'wt') as f:
        f.write(description)

date = '2019-07-10'

if not os.path.exists('data/' + date + '.json'):
    api_key = read_api_key()


@@ 61,5 87,8 @@ if not os.path.exists('data/' + date + '.json'):
            print(api_response['msg'])
    else:
        save_api_response(api_response)
        userpage = get_userpage(date) # TO-DO: This line and the following are prone to errors, use a try-catch structure.
        description = scrap_description(userpage)
        save_description(date, description)
else:
    print('It was already downloaded.')