~fixato/gemini-tools

d14a7fc368ad2e4777b2ea8ea0bb2ad5a03aab63 — Filip H.F. "FiXato" Slagter 5 months ago 1ae6cca
Various improvements to the asciidoc-to-gemini converter

Fix includedir issues by allowing it to be passed as keyword arg to the converter
Fix datetime parsing by removing the comma separating the date and time.
Added *_formatted_gemini to datetime metadata which is formatted as YYYY-MM-DD (%Y-%m-%d)
Strip an extra layer of file extensions from new target asciidoc filename (work-around for .hashless.adoc)
Fix for stripping escaped e-mail addresses. (work-around to get mastodon usernames working)
Added 'First published on' date line to output gemtext
Fixed issues with sorting index files where pubdate is not set. (Might want to re-enable the print output again)
Use (%Y-%m-%d) format for index listing link titles
Disable listing hidden articles again
Added a requirements.txt (probably missing a couple of dependencies still though...)
3 files changed, 30 insertions(+), 11 deletions(-)

M converter.py
M lib/asciidoc_to_gemini_converter.py
A requirements.txt
M converter.py => converter.py +2 -2
@@ 2,7 2,7 @@ from lib.asciidoc_to_gemini_converter import AdocToGeminiConverter
from sys import argv
from pathlib import Path
from pprint import pprint as pp
converter = AdocToGeminiConverter(app_name = 'fixato_gemlog_converter')
converter = AdocToGeminiConverter(app_name = 'fixato_gemlog_converter', includedir=f"""{Path(__file__).parent.parent.resolve()}/""")

for file in argv[1:]:
    print(f"""Converting: {file}""")


@@ 21,4 21,4 @@ for file in argv[1:]:
    for (doctype, path) in documents.items():
        print(f"""{doctype}: {path}\n""")

converter.index_from_converted(index_template=Path('index_post.template.gmi'))
converter.index_from_converted(index_template=Path(__file__).parent.joinpath('index_post.template.gmi'))

M lib/asciidoc_to_gemini_converter.py => lib/asciidoc_to_gemini_converter.py +26 -9
@@ 41,6 41,7 @@ class AdocToGeminiConverter:
        self.current_cached_gemini = None
        self.current_metadata = None
        self.converted = {}
        self.includedir = kwargs.get('includedir') if kwargs.get('includedir') else '.'

        for filepath in [self.cache_home]:
            filepath.mkdir(parents=True, exist_ok=True)


@@ 69,7 70,7 @@ class AdocToGeminiConverter:

    def formatted_datetime(self, datetime_string, target_timezone=None, format_string=None):
        if not format_string:
            format_string="%A %d %B %Y, %X %Z"
            format_string="%A %d %B %Y %X %Z"
        return self.parse_datetime(datetime_string=datetime_string, target_timezone=target_timezone).strftime(format_string)

    def add_to_converted(self):


@@ 125,6 126,7 @@ class AdocToGeminiConverter:
        for k,v in metadata.copy().items():
            if isinstance(v, datetime):
                metadata[f"""{k}_formatted"""] = self.formatted_datetime(v)
                metadata[f"""{k}_formatted_gemini"""] = self.formatted_datetime(v, format_string="%Y-%m-%d")
        self.current_metadata = metadata




@@ 141,7 143,7 @@ class AdocToGeminiConverter:

    def load_asciidoc(self, filepath):
        new_filename=slugify(
            str(filepath.with_suffix('')),
            str(filepath.with_suffix('').with_suffix('')),
            max_length=self.TITLE_SLUG_MAX_LENGTH,
            regex_pattern=self.RE_TITLE_SLUG_ALLOWED_CHARACTERS,
            replacements=self.TITLE_SLUG_REPLACEMENTS


@@ 178,7 180,9 @@ class AdocToGeminiConverter:
        new_contents = self.insert_markdown_title_from_asciidoc_frontmatter(
            self.increase_markdown_headers(
                self.fix_markdown_lists(
                    self.fix_markdown_footnotes(contents)
                    self.fix_markdown_footnotes(
                      self.fix_markdown_escaped_mailaddress(contents)
                    )
                )
            )
        )


@@ 195,6 199,7 @@ class AdocToGeminiConverter:

    def add_header_to_gemini(self, text):
        header = []
        header.append(self.published_line())
        header.append(self.revision_line())
        header.append(self.journal_line())
        header_text = '\n'.join([line for line in header if line])


@@ 230,6 235,9 @@ class AdocToGeminiConverter:
        new_text = re.sub(r'(.*)\[([0-9]{1,})\](.*)', self.superscript_match, text)
        return re.sub(r'^(footnote¹:)', r'# Footnotes:\n\1', new_text, flags=re.MULTILINE)

    def fix_markdown_escaped_mailaddress(self, text):
        return text.replace('\\\@', '\\@')

    def fix_markdown_lists(self, text):
        return re.sub(r'^((-|[0-9]+\.).+)$(\n\s{4,}[^\n]+)*\n(?=\n(-|[0-9]+\.)  )', r'\1\3', text, flags=re.MULTILINE)



@@ 240,6 248,7 @@ class AdocToGeminiConverter:
            "asciidoctor",
            "-a", "ext=gmi",
            "-a", "baseurl=",
            "-a", f"""includedir={self.includedir}""",
            "-b", "docbook",
            str(self.current_cached_asciidoc),
            "-o", str(self.current_cached_docbook)


@@ 265,14 274,17 @@ class AdocToGeminiConverter:

    def index_from_converted(self, index_template, order_callback=None):
        if not order_callback:
            order_callback=partial(sorted, key=lambda item: item.get('metadata').get('attributes').get('pubdate'), reverse=True)
        print('\n'.join([' - '.join([doc.get('metadata').get('last_revision_formatted'), doc.get('metadata').get('title')]) for doc in order_callback(self.converted.values())]))
        print('\n'.join([' - '.join([doc.get('metadata').get('published_formatted'), doc.get('metadata').get('title')]) for doc in order_callback(self.converted.values())]))
            order_callback=partial(sorted,
              key=lambda item: item.get('metadata').get('published') if item.get('metadata').get('published') else item.get('metadata').get('ctime'),
              reverse=True)
        #print('\n'.join([' - '.join([doc.get('metadata').get('last_revision_formatted'), doc.get('metadata').get('title')]) for doc in order_callback(self.converted.values())]))
        #print('\n'.join([' - '.join([doc.get('metadata').get('published_formatted'), doc.get('metadata').get('title')]) for doc in order_callback(self.converted.values())]))
        index_page_template = index_template.read_text()
        ordered_docs = order_callback(self.converted.values())
        links_list = '\n'.join(
            [
                f"""=> {doc.get('gemini').name} {doc.get('metadata').get('published_formatted')} - {doc.get('metadata').get('title')}""" \
                for doc in order_callback(self.converted.values()) if doc.get('metadata').get('attributes').get('no-index') != 'true' and  doc.get('metadata').get('attributes').get('page-published') == 'true'
                f"""=> {doc.get('gemini').name} {doc.get('metadata').get('published_formatted_gemini')} - {doc.get('metadata').get('title')}""" \
                for doc in ordered_docs if (doc.get('metadata').get('attributes').get('no-index') == None or doc.get('metadata').get('attributes').get('no-index') != 'true') and (doc.get('metadata').get('attributes').get('page-published') == None or doc.get('metadata').get('attributes').get('page-published') == 'true')
            ]
        )
        index_page = index_page_template.replace('{POSTS}', links_list)


@@ 282,7 294,7 @@ class AdocToGeminiConverter:
                for doc in order_callback(self.converted.values()) if not (doc.get('metadata').get('attributes').get('no-index') != 'true' and  doc.get('metadata').get('attributes').get('page-published') == 'true')
            ]
        )
        index_page = index_page.replace('{HIDDEN_POSTS}', hidden_links_list)
        #index_page = index_page.replace('{HIDDEN_POSTS}', hidden_links_list)
        self.cache_home.joinpath('index').with_suffix('.gmi').write_text(index_page)




@@ 291,6 303,11 @@ class AdocToGeminiConverter:
            return f"""Last revised on {self.current_metadata.get('last_revision_formatted')}"""
        return None

    def published_line(self):
        if self.current_metadata.get('published_formatted'):
            return f"""First published on {self.current_metadata.get('published_formatted')}"""
        return None

    def journal_line(self):
        if self.current_metadata.get('journal_date_formatted'):
            return f"""Part of journal entry for {self.current_metadata.get('journal_date_formatted')}"""

A requirements.txt => requirements.txt +2 -0
@@ 0,0 1,2 @@
tzlocal
maya