@@ 15,6 15,7 @@
import configparser
import os.path
+import re
import requests
import youtube_dl
@@ 51,7 52,32 @@ def save_api_response(api_response):
save_json(filename, api_response)
save_media(filename, api_response)
-date = '2029-08-14'
+def intl_to_short(date):
+ shortdate = date.replace('-', '')
+ shortdate = shortdate[2:]
+ return shortdate
+
+def get_userpage(date):
+ shortdate = intl_to_short(date)
+ url = 'https://apod.nasa.gov/apod/ap' + shortdate + '.html'
+ payload = {}
+ r = requests.get(url, params = payload)
+ return r.text
+
+def scrap_description(pagesource):
+ re_description = re.compile("Explanation: </b> (.*?)<p>", flags = re.DOTALL)
+ description = re_description.search(pagesource).groups()[0]
+ description = description.replace('\n>', '>')
+ description = description.replace('\n', ' ')
+ description = re.sub('\s+', ' ', description).strip()
+ return description
+
+def save_description(date, description):
+ filename = 'data/' + date
+ with open(filename + '.html', 'wt') as f:
+ f.write(description)
+
+date = '2019-07-10'
if not os.path.exists('data/' + date + '.json'):
api_key = read_api_key()
@@ 61,5 87,8 @@ if not os.path.exists('data/' + date + '.json'):
print(api_response['msg'])
else:
save_api_response(api_response)
+ userpage = get_userpage(date) # TO-DO: This line and the following are prone to errors, use a try-catch structure.
+ description = scrap_description(userpage)
+ save_description(date, description)
else:
print('It was already downloaded.')