@@ 14,6 14,8 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import configparser
+import datetime
+import json
import os.path
import re
import requests
@@ 24,71 26,109 @@ def read_config(section, key):
config.read('config.cfg')
return config[section][key]
-def ask_api(api_key, date):
- baseurl = 'https://api.nasa.gov/planetary/apod'
- payload = {'api_key': api_key, 'date': date}
- r = requests.get(baseurl, params=payload)
- return r.json()
+def is_invalid(date):
+ try:
+ datetime.datetime.strptime(date, '%Y-%m-%d')
+ except ValueError:
+ return "Incorrect date format, should be YYYY-MM-DD."
+ else:
+ return False
-def save_json(filename, api_response):
- with open(filename + '.json', 'wt') as f:
- f.write(str(api_response))
+class Apod:
-def save_media(filename, api_response):
- if api_response['media_type'] == 'image':
- r = requests.get(api_response['hdurl'])
- filename = filename + '.jpg'
- with open(filename, 'wb') as f:
- for chunk in r.iter_content(chunk_size=128):
- f.write(chunk)
- elif api_response['media_type'] == 'video':
- filename = filename + '.%(ext)s'
- ydl_opts = {'outtmpl': filename, 'quiet': True}
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
- ydl.download([api_response['url']])
+ def __init__(self, *args):
+ self.api_key = read_config('NASA_API', 'api_key')
+ if args:
+ self.date = args[0]
+ else:
+ self.date = ''
+ self.date = self.ask_api()['date']
+ self.filename = 'data/' + self.date
+ self.error = False
+ self.consult()
+ if not self.error:
+ self.title = self.api_response['title']
+ self.media_type = self.api_response['media_type']
+ if self.media_type == 'image':
+ self.link = self.api_response['hdurl']
+ elif self.media_type == 'video':
+ self.link = self.api_response['url']
+ self.explanation()
-def save_api_response(api_response):
- filename = 'data/' + api_response['date']
- save_json(filename, api_response)
- save_media(filename, api_response)
+ def ask_api(self):
+ baseurl = 'https://api.nasa.gov/planetary/apod'
+ payload = {'api_key': self.api_key, 'date': self.date}
+ r = requests.get(baseurl, params=payload)
+ return r.json()
-def intl_to_short(date):
- shortdate = date.replace('-', '')
- shortdate = shortdate[2:]
- return shortdate
+ def consult(self):
+ if os.path.exists('data/' + self.date + '.json'):
+ with open(self.filename + '.json', 'rt') as f:
+ self.api_response = json.load(f)
+ else:
+ self.api_response = self.ask_api()
+ if 'code' in self.api_response:
+ if self.api_response['code'] == 400:
+ self.error = self.api_response['msg']
+ else:
+ self.error = self.api_response['code'] + ': ' + self.api_response['msg']
+ else:
+ with open(self.filename + '.json', 'wt') as f:
+ json.dump(self.api_response, f)
-def get_userpage(date):
- shortdate = intl_to_short(date)
- url = 'https://apod.nasa.gov/apod/ap' + shortdate + '.html'
- payload = {}
- r = requests.get(url, params=payload)
- return r.text
+ def get_userpage(self):
+ shortdate = self.date.replace('-', '')
+ shortdate = shortdate[2:]
+ url = 'https://apod.nasa.gov/apod/ap' + shortdate + '.html'
+ payload = {}
+ r = requests.get(url, params=payload)
+ return r.text
-def scrap_description(pagesource):
- re_description = re.compile("Explanation: </b> (.*?)<p>", flags=re.DOTALL)
- description = re_description.search(pagesource).groups()[0]
- description = description.replace('\n>', '>')
- description = description.replace('\n', ' ')
- description = re.sub('\s+', ' ', description).strip()
- return description
+ def scrap_explanation(self, pagesource):
+ re_explanation = re.compile("Explanation: </b> (.*?)<p>", flags=re.DOTALL) # Compile regex for extracting explanation.
+ explanation = re_explanation.search(pagesource).groups()[0] # Extract explanation.
+ explanation = explanation.replace('/\n', '/') # Fix split URLs along several lines.
+ explanation = explanation.replace('\n>', '>') # Fix split HTML tags.
+ explanation = explanation.replace('<a/>', '</a>') # Fix typos (they seem to write the HTML by hand, yes).
+ explanation = explanation.replace('\n', ' ') # Delete all newlines.
+ explanation = re.sub('\s+', ' ', explanation).strip() # Substitute repeated spaces and strips the ones at the beginning and the end of the string.
+ explanation = re.sub(r'<a([^>]*)href=["\'](?!http)([^"\']*)["\']([^>]*)>', r'<a\1href="https://apod.nasa.gov/apod/\2"\3>', explanation) # Change relative paths to absolute.
+ return explanation
-def save_description(date, description):
- filename = 'data/' + date
- with open(filename + '.html', 'wt') as f:
- f.write(description)
+ def save_explanation(self, explanation):
+ with open(self.filename + '.html', 'wt') as f:
+ f.write(explanation)
-date = '2019-07-10'
+ def explanation(self):
+ filename = self.filename + '.html'
+ if os.path.exists(filename):
+ with open(filename, 'rt') as f:
+ self.explanation = f.read()
+ self.html = True
+ else:
+ try:
+ userpage = self.get_userpage()
+ explanation = self.scrap_explanation(userpage)
+ except:
+ explanation = self.api_response['explanation']
+ self.html = False
+ else:
+ self.save_explanation(explanation)
+ self.html = True
+ self.explanation = explanation
-if not os.path.exists('data/' + date + '.json'):
- api_key = read_config('NASA_API', 'api_key')
- api_response = ask_api(api_key, date)
- if 'code' in api_response:
- if api_response['code'] == 400:
- print(api_response['msg'])
- else:
- save_api_response(api_response)
- userpage = get_userpage(date) # TO-DO: This line and the following are prone to errors, use a try-catch structure.
- description = scrap_description(userpage)
- save_description(date, description)
-else:
- print('It was already downloaded.')
+# TO-DO: Check if already downloaded first
+# def download_media():
+# if self.media_type == 'image':
+# link = self.api_response['hdurl']
+# r = requests.get(link)
+# extension = os.path.splitext(link)[1]
+# filename = self.filename + extension
+# with open(filename, 'wb') as f:
+# for chunk in r.iter_content(chunk_size=128):
+# f.write(chunk)
+# elif self.media_type == 'video':
+# filename = self.filename + '.%(ext)s'
+# ydl_opts = {'outtmpl': filename, 'quiet': True}
+# with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+# ydl.download([api_response['url']])