~albertlarsan68/gem2html

635fc5ca6abe9fb355799228bccd47402ac92fc5 — Albert Larsan 8 months ago 92197c0
Add code

Signed-off-by: Albert Larsan <albertlarsan@unbon.cafe>
2 files changed, 299 insertions(+), 4 deletions(-)

M README.md
M src/main.rs
M README.md => README.md +7 -3
@@ 8,17 8,21 @@ Generate HTML files from Gemtext files
2. Run `cargo run`, it will convert all files in the input directory to the output one.
3. Enjoy!

## WARNING!

This tool does not sanitize the HTML in any way, so do not convert untrusted inputs, or the output may be totally invalid.

## Templating

In oreder to get a good experience with both the HTML and Gemtext output, there is a small templating system.
The usage is as follows:
In order to get a good experience with both the HTML and Gemtext output, there is a small templating system.  
The usage is as follows, for now only in the URL of the links:

- `{{{}}` (three opening and two closing brackets) is replaced by `{`
- `{{}}}` (two opening and three closing brackets) is replaced by `}`
- `{{prot}}` is replaced by the protocol of the file being generated (`https` for HTML, and `gemini` for Gemtext)
- `{{ext}}` is replaced by the extension of the file being generated (`html` for HTML, and `gmi` for Gemtext)

No replacement is done in code blocks
No replacement will ever be done in code blocks

## License


M src/main.rs => src/main.rs +292 -1
@@ 1,3 1,294 @@
use std::{
    collections::VecDeque,
    fs::File,
    io::{self, BufRead, BufReader, Read, Seek, Write as _},
    path::Path,
};

fn main() {
    println!("Hello, world!");
    if let Some(arg) = std::env::args_os().nth(1) {
        io::copy(&mut convert_file(&arg).unwrap(), &mut io::stdout()).unwrap();
    }
}

struct FrontMatter {
    title: Option<String>,
    head: Vec<String>,
    lang: Option<String>,
}

enum State {
    Initial,
    Normal,
    Preformatted,
    List,
    Quote,
    Finished,
}

struct Converter<R> {
    reader: R,
    front_matter: FrontMatter,
    state: State,
    buf: VecDeque<u8>,
}

fn convert_file(
    path: impl AsRef<Path>,
) -> Result<Converter<BufReader<File>>, Box<dyn std::error::Error>> {
    let file = File::open(path)?;
    let reader = BufReader::new(file);
    let converter = Converter::new_with_maybe_frontmatter(reader);
    Ok(converter)
}

impl<R: BufRead + Seek> Converter<R> {
    fn new_with_maybe_frontmatter(mut reader: R) -> Self {
        let front_matter = FrontMatter::with_maybe_frontmatter(&mut reader);
        Self {
            reader,
            front_matter,
            state: State::Initial,
            buf: VecDeque::new(),
        }
    }
}

impl<R: BufRead> Converter<R> {
    /// It is a logic error to call this method if the reader is not at the beginning of a
    /// frontmatter block (i.e. the first line is "```gem2html", usually at the start of the file).
    #[allow(dead_code)]
    fn with_frontmatter(mut reader: R) -> Result<Self, Box<dyn std::error::Error>> {
        let front_matter = FrontMatter::new_with_frontmatter(&mut reader);
        Ok(Self {
            reader,
            front_matter,
            state: State::Initial,
            buf: VecDeque::new(),
        })
    }

    fn handle_link(&mut self, data: &str) -> io::Result<()> {
        let mut parts = data.splitn(2, ' ');
        let url = parts
            .next()
            .unwrap()
            .replace("{{ext}}", "html")
            .replace("{{proto}}", "https")
            .replace("{{{}}", "{")
            .replace("{{}}}", "}");
        let text = parts.next().unwrap_or(&url);
        writeln!(&mut self.buf, "<p><a href=\"{url}\">=&gt; {text}</a></p>")?;
        Ok(())
    }

    fn do_line(&mut self, line: &str) -> io::Result<()> {
        if line.is_empty() {
            match self.state {
                State::Initial => {
                    self.generate_header()?;
                }
                State::Normal => {}
                State::Preformatted => {
                    writeln!(&mut self.buf, "</pre>")?;
                }
                State::List => {
                    writeln!(&mut self.buf, "</ul>")?;
                }
                State::Quote => {
                    writeln!(&mut self.buf, "</blockquote>")?;
                }
                State::Finished => {}
            }
            self.state = State::Finished;
            writeln!(&mut self.buf, "</body>\n</html>")?;
        } else {
            match self.state {
                State::Initial => {
                    self.generate_header()?;
                    self.state = State::Normal;
                }
                State::Normal => {
                    if let Some(ty) = line.strip_prefix("```") {
                        self.state = State::Preformatted;
                        writeln!(&mut self.buf, "<pre alt=\"{}\">", ty.trim())?;
                    } else if let Some(data) = line.strip_prefix("=>") {
                        self.handle_link(data.trim())?;
                    } else if let Some(title) = line.strip_prefix("###") {
                        writeln!(&mut self.buf, "<h3>### {}</h3>", title.trim())?;
                    } else if let Some(title) = line.strip_prefix("##") {
                        writeln!(&mut self.buf, "<h2>## {}</h2>", title.trim())?;
                    } else if let Some(title) = line.strip_prefix('#') {
                        writeln!(&mut self.buf, "<h1># {}</h1>", title.trim())?;
                    } else if let Some(text) = line.strip_prefix("* ") {
                        self.state = State::List;
                        writeln!(&mut self.buf, "<ul>\n<li>{}</li>", text.trim())?;
                    } else if let Some(text) = line.strip_prefix("> ") {
                        self.state = State::Quote;
                        writeln!(&mut self.buf, "<blockquote>\n<p>{}</p>", text.trim())?;
                    } else {
                        writeln!(&mut self.buf, "<p>{}</p>", line.trim())?;
                    }
                }
                State::Preformatted => {
                    if line.trim().starts_with("```") {
                        self.state = State::Normal;
                        writeln!(&mut self.buf, "</pre>")?;
                    } else {
                        self.buf.extend(line.as_bytes());
                    }
                }
                State::List => {
                    if let Some(text) = line.strip_prefix("* ") {
                        writeln!(&mut self.buf, "<li>{}</li>", text.trim())?;
                    } else {
                        self.state = State::Normal;
                        writeln!(&mut self.buf, "</ul>")?;
                        self.do_line(line)?;
                    }
                }
                State::Quote => {
                    if let Some(text) = line.strip_prefix("> ") {
                        writeln!(&mut self.buf, "<p>{}</p>", text.trim())?;
                    } else {
                        self.state = State::Normal;
                        writeln!(&mut self.buf, "</blockquote>")?;
                        self.do_line(line)?;
                    }
                }
                State::Finished => {}
            }
        }
        Ok(())
    }

    fn generate_next_line(&mut self) -> io::Result<()> {
        let mut line = String::new();
        self.reader.read_line(&mut line)?;
        self.do_line(&line)
    }
}

impl<R> Converter<R> {
    fn generate_header(&mut self) -> io::Result<()> {
        writeln!(
            &mut self.buf,
            r#"<!DOCTYPE html>
<html{}>
<head>
<meta charset="utf-8">
<title>{}</title>
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta http-equiv="x-ua-compatible" content="IE=edge">
<link href="/style.css" rel="stylesheet">
"#,
            self.front_matter
                .lang
                .as_deref()
                .map_or(String::new(), |lang| format!(" lang=\"{}\"", lang)),
            self.front_matter.title.as_deref().unwrap_or("Untitled")
        )?;
        // if let Some(author) = self.front_matter.author.as_deref() {
        //     writeln!(
        //         &mut self.buf,
        //         "<meta name=\"author\" content=\"{}\">",
        //         author
        //     )?;
        // }
        // if let Some(description) = self.front_matter.description.as_deref() {
        //     writeln!(
        //         &mut self.buf,
        //         "<meta name=\"description\" content=\"{}\">",
        //         description
        //     )?;
        // }
        // if let Some(keywords) = self.front_matter.keywords.as_deref() {
        //     writeln!(
        //         &mut self.buf,
        //         "<meta name=\"keywords\" content=\"{}\">",
        //         keywords
        //     )?;
        // }
        for line in &self.front_matter.head {
            writeln!(&mut self.buf, "{}", line)?;
        }
        writeln!(&mut self.buf, "</head>\n<body>")?;
        Ok(())
    }
}

impl<R: BufRead> Read for Converter<R> {
    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
        let this_buf = self.fill_buf()?;
        let len = buf.len().min(this_buf.len());
        buf[..len].copy_from_slice(&this_buf[..len]);
        self.consume(len);
        Ok(len)
    }
}

impl<R: BufRead> BufRead for Converter<R> {
    fn fill_buf(&mut self) -> io::Result<&[u8]> {
        if matches!(self.state, State::Finished) {
            return Ok(&[]);
        }
        if self.buf.is_empty() {
            self.generate_next_line()?;
        }
        Ok(self.buf.make_contiguous())
    }

    fn consume(&mut self, amt: usize) {
        self.buf.drain(..amt);
    }
}

impl FrontMatter {
    /// It is a logic error to call this method if the reader is not at the beginning of a
    /// frontmatter block (i.e. the first line is "```gem2html", usually at the start of the file).
    fn new_with_frontmatter<R: BufRead>(reader: R) -> Self {
        let mut reader = reader;
        let mut line = String::new();
        reader.read_line(&mut line).unwrap();
        if line.trim() != "```gem2html" {
            panic!("not at the beginning of a frontmatter block");
        }
        let mut title = None;
        let mut head = Vec::new();
        let mut lang = None;
        loop {
            line.clear();
            reader.read_line(&mut line).unwrap();
            if line.trim() == "```" {
                break;
            }
            if let Some(title_str) = line.strip_prefix("title: ") {
                title = Some(title_str.trim().to_string());
            }
            if let Some(head_str) = line.strip_prefix("head: ") {
                let head_str = head_str.trim();
                head.push(head_str.to_string());
            }
            if let Some(lang_str) = line.strip_prefix("lang: ") {
                lang = Some(lang_str.trim().to_string());
            }
        }
        Self { title, head, lang }
    }

    fn with_maybe_frontmatter<R: BufRead + Seek>(reader: &mut R) -> Self {
        let orig_pos = reader.stream_position().unwrap();
        let mut line = String::new();
        reader.read_line(&mut line).unwrap();
        reader.seek(std::io::SeekFrom::Start(orig_pos)).unwrap();
        if line.trim() != "```gem2html" {
            Self {
                title: None,
                head: Vec::new(),
                lang: None,
            }
        } else {
            Self::new_with_frontmatter(reader)
        }
    }
}