~nhanb/pytaku

34d5390109086a17b0270b6aa3c9ccb35885c621 — Bùi Thành Nhân 1 year, 6 months ago 990ea70 0.4.2
mangasee with the new url fuckery

Mangasee may now use a different chapter link depending on the first
digit of its raw id. Therefore, we now need to use that raw id as our
unique `chapter.id`. This naturally required a migration script to
update existing records too.

Also disable mangadex updates because, you know, it's dead.
M src/mangoapi/mangasee.py => src/mangoapi/mangasee.py +40 -9
@@ 35,7 35,7 @@ class Mangasee(Site):
            numbers = _parse_chapter_number(ch["Chapter"])
            chapters.append(
                {
                    "id": numbers["number"],
                    "id": numbers["raw_id"],
                    "name": ch["ChapterName"],
                    "volume": "",
                    "groups": [],


@@ 55,17 55,20 @@ class Mangasee(Site):
        }

    def get_chapter(self, title_id, chapter_id):
        resp = self.http_get(
            f"https://mangasee123.com/read-online/{title_id}-chapter-{chapter_id}.html"
        )
        numbers = _parse_chapter_number(chapter_id)
        index = chapter_id[0]
        suffix = "" if index == "1" else f"-index-{index}"
        url = f"https://mangasee123.com/read-online/{title_id}-chapter-{numbers['number']}{suffix}.html"
        print(">>", url)
        resp = self.http_get(url)
        html = resp.text

        title_id = regexes["chapter_title_name"].search(html).group(1)
        chapter_data = json.loads(regexes["chapter_data"].search(html).group(1))
        num_pages = int(chapter_data["Page"])
        directory = chapter_data["Directory"]
        img_server = regexes["chapter_img_server"].search(html).group(1)
        img_server = regexes["chapter_img_server"].search(html).group(1)

        numbers = _parse_chapter_number(chapter_data["Chapter"])

        result = {
            "id": chapter_id,


@@ 73,7 76,9 @@ class Mangasee(Site):
            "site": "mangasee",
            "name": chapter_data["ChapterName"] or "",
            "pages": [
                _generate_img_src(img_server, title_id, chapter_data["Chapter"], p)
                _generate_img_src(
                    img_server, title_id, chapter_data["Chapter"], directory, p
                )
                for p in range(1, num_pages + 1)
            ],
            "pages_alt": [],


@@ 180,13 185,35 @@ def _parse_chapter_number(e):
    result = {
        "num_major": major,
        "number": str(major) if not minor else f"{major}.{minor}",
        "raw_id": e,
    }
    if minor:
        result["num_minor"] = minor
    return result


def _generate_img_src(img_srv, title_id, chapter_id, page):
def _chapter_url(e):
    """
    Yet another bright idea:

        (vm.ChapterURLEncode = function (e) {
            Index = "";
            var t = e.substring(0, 1);
            1 != t && (Index = "-index-" + t);
            var n = parseInt(e.slice(1, -1)),
            m = "",
            a = e[e.length - 1];
            return (
            0 != a && (m = "." + a),
            "-chapter-" + n + m + Index + vm.PageOne + ".html"
            );
        }),

    e.g. vm.ChapterURLEncode("201420") === "-chapter-142-index-2-page-1.html"
    """


def _generate_img_src(img_srv, title_id, chapter_id, directory, page):
    """
    Chapter ID padding logic:



@@ 206,4 233,8 @@ def _generate_img_src(img_srv, title_id, chapter_id, page):
        padded_chapter = chapter
    else:
        padded_chapter = f"{chapter}.{odd}"
    return f"https://{img_srv}/manga/{title_id}/{padded_chapter}-{page:03d}.png"

    directory = f"{directory}/" if directory else ""
    return (
        f"https://{img_srv}/manga/{title_id}/{directory}{padded_chapter}-{page:03d}.png"
    )

M src/pytaku/main.py => src/pytaku/main.py +1 -0
@@ 98,6 98,7 @@ def proxy_view(b64_url):
        - be a polite netizen in general
    """
    url = _decode_proxy_url(b64_url)
    print("Proxying url:", url)
    if not _is_manga_img_url(url):
        print("Invalid img url:", url)
        return "Nope", 400

M src/pytaku/scheduler.py => src/pytaku/scheduler.py +4 -0
@@ 65,6 65,10 @@ class UpdateOutdatedTitles(Worker):

    def run(self):
        for title in find_outdated_titles():
            if title["site"] == "mangadex":
                print(f"Skipped title {title['id']} from {title['site']}.")
                continue

            print(f"Updating title {title['id']} from {title['site']}...", end="")
            try:
                updated_title = get_title(title["site"], title["id"])

A src/pytaku/scripts/__init__.py => src/pytaku/scripts/__init__.py +0 -0
A src/pytaku/scripts/migrate_mangasee_chapter_ids.py => src/pytaku/scripts/migrate_mangasee_chapter_ids.py +76 -0
@@ 0,0 1,76 @@
import subprocess
from typing import List, Tuple

from mangoapi.mangasee import Mangasee
from pytaku.database.common import get_conn, run_sql
from pytaku.persistence import save_title

ms = Mangasee()


def fetch_title(title_id: str) -> List[Tuple[str, str]]:
    """
    Each tuple is (old_id, new_id)
    """
    title = ms.get_title(title_id)
    updates = [(title_id, ch["number"], ch["id"]) for ch in title["chapters"]]
    return title, updates


def migrate():
    mangasee_titles = run_sql(
        "SELECT id FROM title WHERE site = 'mangasee' ORDER BY lower(id);"
    )
    print(f"There are {len(mangasee_titles)} titles to update.")

    diffs = []
    new_titles = []
    for title_id in mangasee_titles:
        print(f">> Fetching {title_id}")
        new_title, new_title_diffs = fetch_title(title_id)
        diffs += new_title_diffs
        new_titles.append(new_title)

    print("Diffs:")
    for diff in diffs:
        print(diff)

    print("Starting db transaction")
    conn = get_conn()
    cursor = conn.cursor()
    cursor.execute("pragma foreign_keys = off;")
    cursor.execute("begin transaction;")

    for new_title in new_titles:
        print(f'Saving title {new_title["id"]}')
        save_title(new_title)

    for title_id, old_chapter_id, new_chapter_id in diffs:
        print("Updating", title_id, old_chapter_id, "to", new_chapter_id)
        cursor.execute(
            "UPDATE chapter SET id=? WHERE id=? AND title_id=? AND site='mangasee';",
            (new_chapter_id, old_chapter_id, title_id),
        )
        cursor.execute(
            "UPDATE read SET chapter_id=? WHERE chapter_id=? AND title_id=? AND site='mangasee';",
            (new_chapter_id, old_chapter_id, title_id),
        )

    cursor.execute("pragma foreign_key_check;")
    cursor.execute("commit;")
    cursor.execute("pragma foreign_keys = on;")
    print("All done!")


def main():
    subprocess.run(["systemctl", "--user", "stop", "pytaku"], check=True)
    subprocess.run(["systemctl", "--user", "stop", "pytaku-scheduler"], check=True)

    migrate()

    subprocess.run(["systemctl", "--user", "start", "pytaku"], check=True)
    subprocess.run(["systemctl", "--user", "start", "pytaku-scheduler"], check=True)


if __name__ == "__main__":
    main()