~fixato/gemini-tools

1ae6ccafc685264df0a41926fdcfba0640a875a5 — Filip H.F. "FiXato" Slagter 5 months ago 9f84048
Added headers and index list

Work-in-progress commit of adding last revision date and journal date reference to the header of gemini files, and parsing an index template and inserting published public posts.

(Currently also includes non-public and non-indexed posts for debugging purposes.)

Converting dates with Maya, and formatting them to system's local timezone.
2 files changed, 87 insertions(+), 2 deletions(-)

M converter.py
M lib/asciidoc_to_gemini_converter.py
M converter.py => converter.py +2 -0
@@ 20,3 20,5 @@ for file in argv[1:]:
    }
    for (doctype, path) in documents.items():
        print(f"""{doctype}: {path}\n""")

converter.index_from_converted(index_template=Path('index_post.template.gmi'))

M lib/asciidoc_to_gemini_converter.py => lib/asciidoc_to_gemini_converter.py +85 -2
@@ 15,6 15,15 @@ from slugify import slugify
import subprocess
from md2gemini import md2gemini
import string
from datetime import datetime
import pytz
from pytz import reference
import tzlocal
from dateutil.parser import parse as parse_datetime
from functools import partial
import maya
TZ = reference.LocalTimezone()
TZ_NAME = tzlocal.get_localzone().zone

class AdocToGeminiConverter:
    def __init__(self, app_name=None, **kwargs):


@@ 31,6 40,7 @@ class AdocToGeminiConverter:
        self.current_cached_markdown = None
        self.current_cached_gemini = None
        self.current_metadata = None
        self.converted = {}

        for filepath in [self.cache_home]:
            filepath.mkdir(parents=True, exist_ok=True)


@@ 53,8 63,30 @@ class AdocToGeminiConverter:
            ''.join(self.superscript_map.values())
        )

    def parse_datetime(self, datetime_string, target_timezone=None):
        target_timezone = TZ_NAME if not target_timezone and TZ else 'UTC'
        return maya.parse(datetime_string).datetime(to_timezone=target_timezone)

    def formatted_datetime(self, datetime_string, target_timezone=None, format_string=None):
        if not format_string:
            format_string="%A %d %B %Y, %X %Z"
        return self.parse_datetime(datetime_string=datetime_string, target_timezone=target_timezone).strftime(format_string)

    def add_to_converted(self):
        self.converted[str(self.current_source)] = {
            'asciidoc': self.current_cached_asciidoc,
            'docbook': self.current_cached_docbook,
            'markdown': self.current_cached_markdown,
            'gemini': self.current_cached_gemini,
            'metadata': self.current_metadata,
        }

    def set_metadata_from_asciidoc(self):
        metadata = {'attributes': {}}
        metadata = {
          'attributes': {},
          'ctime': self.parse_datetime(datetime.fromtimestamp(self.current_source.stat().st_ctime, tz=TZ).astimezone(pytz.utc).isoformat()), #This is likely the same as the last modified time, since not all file systems register a creation timestamp.
          'mtime': self.parse_datetime(datetime.fromtimestamp(self.current_source.stat().st_mtime, tz=TZ).astimezone(pytz.utc).isoformat())
        }
        line_no = 0
        with self.current_cached_asciidoc.open() as f:
            for line in f:


@@ 87,6 119,12 @@ class AdocToGeminiConverter:
                        raise Exception(f"""Malformed asciidoc attribute line: {line}""")
                    metadata['attributes'][line_match.groups()[0]] = line_match.groups()[1]

        metadata['last_revision'] = self.parse_datetime(metadata['attributes'].get('revdate') if metadata['attributes'].get('revdate') else metadata.get('mtime'))
        metadata['published'] = self.parse_datetime(metadata['attributes'].get('pubdate') if metadata['attributes'].get('pubdate') else metadata.get('ctime'))
        metadata['journal_date'] = self.parse_datetime(metadata['attributes'].get('journal-date')) if metadata['attributes'].get('journal-date') else None
        for k,v in metadata.copy().items():
            if isinstance(v, datetime):
                metadata[f"""{k}_formatted"""] = self.formatted_datetime(v)
        self.current_metadata = metadata




@@ 120,6 158,7 @@ class AdocToGeminiConverter:
        self.postprocess_markdown()
        self.convert_markdown_to_gemini()
        self.postprocess_gemini()
        self.add_to_converted()

    def preprocess_asciidoc(self):
        contents = self.current_cached_asciidoc.read_text()


@@ 147,9 186,20 @@ class AdocToGeminiConverter:

    def postprocess_gemini(self):
        contents = self.current_cached_gemini.read_text()
        new_contents = self.condense_newlines(self.move_code_block_title_to_alt(contents))
        new_contents = self.condense_newlines(
            self.add_header_to_gemini(
                self.move_code_block_title_to_alt(contents)
            )
        )
        self.current_cached_gemini.write_text(new_contents)

    def add_header_to_gemini(self, text):
        header = []
        header.append(self.revision_line())
        header.append(self.journal_line())
        header_text = '\n'.join([line for line in header if line])
        return text.replace('\n', f"""\n{header_text}\n""", 1)

    def strip_hidden_gemini_links(self, text):
        return re.sub(r'([a-zA-Z]+://[^[\s]+)\[("[^"]+"|\'[^\']+\'|[^]]+),gemini=hidden(,([a-zA-Z0-9_-]+="[^"]+"||\'[^\']+\'[^]]+))*\]', r'\2', text)



@@ 212,3 262,36 @@ class AdocToGeminiConverter:
        self.current_cached_gemini.touch()
        gemini = md2gemini(self.current_cached_markdown.read_text(), links='copy')
        self.current_cached_gemini.write_text(gemini)

    def index_from_converted(self, index_template, order_callback=None):
        if not order_callback:
            order_callback=partial(sorted, key=lambda item: item.get('metadata').get('attributes').get('pubdate'), reverse=True)
        print('\n'.join([' - '.join([doc.get('metadata').get('last_revision_formatted'), doc.get('metadata').get('title')]) for doc in order_callback(self.converted.values())]))
        print('\n'.join([' - '.join([doc.get('metadata').get('published_formatted'), doc.get('metadata').get('title')]) for doc in order_callback(self.converted.values())]))
        index_page_template = index_template.read_text()
        links_list = '\n'.join(
            [
                f"""=> {doc.get('gemini').name} {doc.get('metadata').get('published_formatted')} - {doc.get('metadata').get('title')}""" \
                for doc in order_callback(self.converted.values()) if doc.get('metadata').get('attributes').get('no-index') != 'true' and  doc.get('metadata').get('attributes').get('page-published') == 'true'
            ]
        )
        index_page = index_page_template.replace('{POSTS}', links_list)
        hidden_links_list = '\n'.join(
            [
                f"""=> {doc.get('gemini').name} {doc.get('metadata').get('published_formatted')} - {doc.get('metadata').get('title')}""" \
                for doc in order_callback(self.converted.values()) if not (doc.get('metadata').get('attributes').get('no-index') != 'true' and  doc.get('metadata').get('attributes').get('page-published') == 'true')
            ]
        )
        index_page = index_page.replace('{HIDDEN_POSTS}', hidden_links_list)
        self.cache_home.joinpath('index').with_suffix('.gmi').write_text(index_page)


    def revision_line(self):
        if self.current_metadata.get('last_revision_formatted'):
            return f"""Last revised on {self.current_metadata.get('last_revision_formatted')}"""
        return None

    def journal_line(self):
        if self.current_metadata.get('journal_date_formatted'):
            return f"""Part of journal entry for {self.current_metadata.get('journal_date_formatted')}"""
        return None