M download_course/download_course.py => download_course/download_course.py +11 -8
@@ 25,6 25,7 @@ from download_course.utils.config_json import (
init_config,
update_config,
update_etag,
+ STORAGE_FILE
)
from download_course.utils.selectors import (
REPLACE_MESelectors,
@@ 292,31 293,32 @@ class REPLACE_MEPage():
) -> None:
"""Download files if they are missing or outdated."""
init_config()
- file_downloaded = check_file_downloaded(file_name)
- is_etag_same = check_file_etag(etag)
+ document_path = section_dir / file_name
+ file_downloaded = check_file_downloaded(document_path)
+ is_etag_same = check_file_etag(document_path, etag)
if not file_downloaded:
logging.info(
"The file isn't in the json, downloading %s",
file_name
)
- file_metadata = {file_name: etag}
+ file_metadata = {str(document_path): etag}
update_config(file_metadata)
- document_dir = section_dir / file_name
document_dl = self.session.get(
document_link_head.headers["Location"]
)
- document_dir.write_bytes(document_dl.content)
+ document_path.write_bytes(document_dl.content)
+ self.files_downloaded += 1
elif not is_etag_same:
logging.info(
"The etags are different! Downloading %s",
file_name
)
- update_etag(file_name, etag)
- document_dir = section_dir / file_name
+ update_etag(document_path, etag)
document_dl = self.session.get(
document_link_head.headers["Location"]
)
- document_dir.write_bytes(document_dl.content)
+ document_path.write_bytes(document_dl.content)
+ self.files_downloaded += 1
else:
logging.debug(
"The file %s has already been downloaded!",
@@ 401,6 403,7 @@ def download_courses(
args.output_dir,
args.promotion
)
+ logging.info("The config file is located at %s", STORAGE_FILE)
logging.info(
"The number of files downloaded is : %s",
REPLACE_ME_page.files_downloaded
M download_course/utils/config_json.py => download_course/utils/config_json.py +2 -4
@@ 3,7 3,6 @@
"""Regroup all the routines pertaining the download of courses."""
import sys
-import logging
import pathlib
import json
from typing import Dict, Any
@@ 34,7 33,6 @@ elif sys.platform == "darwin":
def init_config() -> None:
"""Init the json if it doesn't exists"""
- logging.info("The config file is located at %s", STORAGE_FILE)
if not STORAGE_FILE.exists():
STORAGE_FILE.write_text(
json.dumps({}, ensure_ascii=False, indent=4),
@@ 47,10 45,10 @@ def read_config() -> Any:
return json.loads(STORAGE_FILE.read_text(encoding="utf-8"))
-def update_etag(file_name: str, etag: str) -> None:
+def update_etag(file_path: pathlib.Path, etag: str) -> None:
"""Update etags in the json file"""
data = json.loads(STORAGE_FILE.read_text(encoding="utf-8"))
- data[file_name] = etag
+ data[str(file_path)] = etag
STORAGE_FILE.write_text(
json.dumps(data, ensure_ascii=False, indent=4),
M download_course/utils/utils.py => download_course/utils/utils.py +8 -5
@@ 36,21 36,24 @@ def slugify(
return re.sub(r'[-\s]+', '-', string).strip('-_')
-def check_file_downloaded(file_name: str) -> bool:
+def check_file_downloaded(file_path: pathlib.Path) -> bool:
"""Check if the file has already been downloaded."""
file_downloaded = False
config = read_config()
- if file_name in config:
+ if str(file_path) in config:
file_downloaded = True
return file_downloaded
-def check_file_etag(etag: str) -> bool:
+def check_file_etag(file_path: pathlib.Path, etag: str) -> bool:
"""Check if the file is up to date."""
is_same_etag = False
config = read_config()
- if etag in config.values():
- is_same_etag = True
+ try:
+ if config[str(file_path)] == etag:
+ is_same_etag = True
+ except KeyError:
+ pass
return is_same_etag