~autumnull/flatiron

55942b56a7e89d6f9dda4569d71b1c73dd175d26 — Autumn! 2 years ago 1796e28
Added conversions for punctuation
M README.textile => README.textile +0 -5
@@ 22,8 22,3 @@ __"It should do what it says on the iron!"__
h2. Contributing

Find any "lice (bugs)":https://en.wikipedia.org/wiki/Clothes_iron#Hygiene ? I always appreciate "offers to fix up my flatiron":https://github.com/autumnull/flatiron/pulls !

h3. TODO

* Parsing
** Punctuation transforms

A samples/textism.html => samples/textism.html +87 -0
@@ 0,0 1,87 @@
<h2 style="color:green">This is a title</h2>
<h3>This is a subhead</h3>
<p style="color:red">This is some text of dubious character. Isn’t the use of “quotes” just lazy writing — and theft of ‘intellectual property’ besides? I think the time has come to see a block quote.</p>
<blockquote lang="fr">This is a block quote. I’ll admit it’s not the most exciting block quote ever devised.</blockquote>
<p>Simple list:</p>
<ol style="color:blue">
<li>one</li>
<li>two</li>
<li>three</li>
</ol>
<p>Multi-level list:</p>
<ol>
<li>one
<ol>
<li>aye</li>
<li>bee</li>
<li>see</li>
</ol>
</li>
<li>two
<ol>
<li>x</li>
<li>y</li>
</ol>
</li>
<li>three</li>
</ol>
<p>Mixed list:</p>
<ul>
<li>Point one</li>
<li>Point two
<ol>
<li>Step 1</li>
<li>Step 2</li>
<li>Step 3</li>
</ol>
</li>
<li>Point three
<ul>
<li>Sub point 1</li>
<li>Sub point 2</li>
</ul>
</li>
</ul>
<p>Well, that went well. How about we insert an <a href="http://www.textism.com/" title="watch out">old-fashioned hypertext link</a>? Will the quote marks in the tags get messed up? No!</p>
<p><a href="http://www.textism.com" title="optional title">This is a link</a></p>
<table  style="border:1px solid black">
<tr>
<th>this</th>
<th>is</th>
<th>a</th>
<th>header</th>
</tr>
<tr style="text-align: left; background:gray">
<td colspan=2>this is</td>
<td style="background:red;width:200px">a</td>
<td style="text-align: justify;vertical-align: top; height:200px">row</td>
</tr>
<tr>
<td>this</td>
<td style="text-align: justify; padding:10px">is</td>
<td style="vertical-align: top;">another</td>
<td class="bob" id="bob">row</td>
</tr>
</table>
<p>An image:</p>
<p><img style="display: block; margin: auto;" src="images/flatiron.png" alt="optional alt text" /></p>
<ol>
<li>Librarians rule</li>
<li>Yes they do</li>
<li>But you knew that</li>
</ol>
<p>Some more text of dubious character. Here is a noisome string of <span class="caps">CAPITAL</span> letters. Here is something we want to <em>emphasize</em>.<br />
That was a linebreak. And something to indicate <strong>strength</strong>. Of course I could use <em>my own <span class="caps">HTML</span> tags</em> if I <strong>felt</strong> like it.</p>
<h3>Coding</h3>
<p>This <code>is some code, "isn't it"</code>. Watch those quote marks! Now for some preformatted text:</p>
<pre><code>$text = str_replace("&lt;p&gt;%::%&lt;/p&gt;","",$text);
$text = str_replace("%::%&lt;/p&gt;","",$text);
$text = str_replace("%::%","",$text);</code></pre>
<p>This isn’t code.</p>
<p>So you see, my friends:</p>
<ul>
<li>The time is now</li>
<li>The time is not later</li>
<li>The time is not yesterday</li>
<li>We must act</li>
</ul>

M src/parse/block.rs => src/parse/block.rs +3 -6
@@ 7,8 7,8 @@ use crate::structs::{
};
use nom::{
    branch::alt,
    bytes::complete::{escaped_transform, tag, take_while1},
    character::complete::{char, line_ending, none_of},
    bytes::complete::{escaped_transform, tag},
    character::complete::{char, digit1, line_ending, none_of},
    combinator::{complete, eof, fail, map_res, opt, value},
    multi::many0_count,
    sequence::{preceded, tuple},


@@ 177,10 177,7 @@ fn header_modifier(input: &str) -> IResult<&str, BlockKind> {
}

fn footnote_modifier(input: &str) -> IResult<&str, BlockKind> {
    let (rest, n) = preceded(
        tag("fn"),
        map_res(take_while1(|c: char| c.is_ascii_digit()), from_num),
    )(input)?;
    let (rest, n) = preceded(tag("fn"), map_res(digit1, from_num))(input)?;
    Ok((rest, BlockKind::Footnote(n)))
}


M src/parse/link.rs => src/parse/link.rs +1 -1
@@ 180,7 180,7 @@ mod tests {
                        "https://en.wikipedia.org/wiki/Magic_Johnson"
                    ),
                    content: Box::new(InlineTag::Plaintext(String::from(
                        "Earvin \"Magic\" Johnson (Basketball Player)"
                        "Earvin “Magic” Johnson (Basketball Player)"
                    )))
                }
            ))

M src/parse/mod.rs => src/parse/mod.rs +5 -3
@@ 18,6 18,7 @@ mod link;
mod list;
mod no_textile;
mod phrase;
mod punctuation;
mod table;

impl PhraseKind {


@@ 68,6 69,7 @@ impl InlineTag {
            phrase::footnote_ref,
            link::link,
            image::image,
            punctuation::punctuation,
        ]
    }
}


@@ 237,7 239,7 @@ This is a paragraph with some _emphasized text_.";
                    indent: None,
                    align: None,
                },
                content: InlineTag::Plaintext(String::from("This is some text of dubious character. Isn't the use of \"quotes\" just lazy writing -- and theft of 'intellectual property' besides? I think the time has come to see a block quote."))
                content: InlineTag::Plaintext(String::from("This is some text of dubious character. Isn’t the use of “quotes” just lazy writing — and theft of ‘intellectual property’ besides? I think the time has come to see a block quote."))
            },
            BlockTag::Basic {
                kind: BlockKind::BlockQuote,


@@ 251,7 253,7 @@ This is a paragraph with some _emphasized text_.";
                    indent: None,
                    align: None,
                },
                content: InlineTag::Plaintext(String::from("This is a block quote. I'll admit it's not the most exciting block quote ever devised."))
                content: InlineTag::Plaintext(String::from("This is a block quote. I’ll admit it’s not the most exciting block quote ever devised."))
            },
            BlockTag::Basic {
                kind: BlockKind::Paragraph,


@@ 739,7 741,7 @@ This is a paragraph with some _emphasized text_.";
                header: BlockHeader {attributes: None,
                indent: None,
                align: None,},
                content: InlineTag::Plaintext(String::from("This isn't code."))
                content: InlineTag::Plaintext(String::from("This isn’t code."))
            },
            BlockTag::Basic {
                kind: BlockKind::Paragraph,

M src/parse/phrase.rs => src/parse/phrase.rs +36 -11
@@ 1,10 1,12 @@
use crate::parse::block::strip_flatiron_extended;
use crate::parse::{acronym::acronym, attributes::attributes, from_num};
use crate::parse::{
    acronym::acronym, attributes::attributes, block::strip_flatiron_extended,
    from_num,
};
use crate::structs::{InlineTag, PhraseKind};
use nom::{
    branch::alt,
    bytes::complete::{escaped_transform, take_while1},
    character::complete::{char, line_ending, none_of},
    bytes::complete::escaped_transform,
    character::complete::{char, digit1, line_ending, none_of},
    combinator::{complete, fail, map_res, opt, value},
    sequence::delimited,
    IResult,


@@ 22,11 24,15 @@ pub fn phrase(input: &str) -> IResult<&str, InlineTag> {
    let mut i = 0;
    let mut preceding_space = true;
    loop {
        // combine plaintext phrases
        combine_plaintext(&mut content);

        // check if end of phrase has been reached
        if &input[i..] == "" {
            if i > 0 {
                content.push(InlineTag::Plaintext(String::from(&input[..i])));
            }
            combine_plaintext(&mut content);
            match content.len() {
                0 => {
                    // return empty plaintext rather than phrase


@@ 110,12 116,17 @@ fn tagged_phrase(
            if i >= input.len() {
                return fail(input);
            }

            // combine plaintext phrases
            combine_plaintext(&mut content);

            // check if end of phrase has been reached
            if let Ok((rest, _delimiter)) = kind.delimiter(&input[i..]) {
                if i > 0 {
                    content
                        .push(InlineTag::Plaintext(String::from(&input[..i])));
                }
                combine_plaintext(&mut content);
                return Ok((
                    rest,
                    InlineTag::Phrase {


@@ 172,6 183,23 @@ fn tagged_phrase(
    }
}

fn combine_plaintext(v: &mut Vec<InlineTag>) {
    while v.len() >= 2 {
        let (i0, i1) = (v.len() - 2, v.len() - 1);
        if let InlineTag::Plaintext(_) = &v[i0] {
            if let InlineTag::Plaintext(_) = &v[i1] {
                if let Some(InlineTag::Plaintext(s1)) = v.pop() {
                    if let Some(InlineTag::Plaintext(s0)) = v.last_mut() {
                        s0.push_str(s1.as_str());
                        continue;
                    }
                }
            }
        }
        break;
    }
}

pub fn line_break(input: &str) -> IResult<&str, InlineTag> {
    let (rest, _) = line_ending(input)?;
    Ok((rest, InlineTag::LineBreak))


@@ 191,11 219,8 @@ pub fn code(input: &str) -> IResult<&str, InlineTag> {
}

pub fn footnote_ref(input: &str) -> IResult<&str, InlineTag> {
    let (rest, n) = delimited(
        char('['),
        map_res(take_while1(|c: char| c.is_ascii_digit()), from_num),
        char(']'),
    )(input)?;
    let (rest, n) =
        delimited(char('['), map_res(digit1, from_num), char(']'))(input)?;
    Ok((rest, InlineTag::FootnoteRef(n)))
}



@@ 485,7 510,7 @@ mod tests {

    #[test]
    fn phrase_with_attributes() {
        let input = "Look, it's %{color:red}red%";
        let input = "Wow, %{color:red}red%";
        let result = phrase(input);
        assert_eq!(
            result,


@@ 495,7 520,7 @@ mod tests {
                    kind: None,
                    attributes: None,
                    content: vec![
                        InlineTag::Plaintext(String::from("Look, it's ")),
                        InlineTag::Plaintext(String::from("Wow, ")),
                        InlineTag::Phrase {
                            kind: Some(PhraseKind::Span),
                            attributes: Some(Attributes {

A src/parse/punctuation.rs => src/parse/punctuation.rs +114 -0
@@ 0,0 1,114 @@
use crate::structs::InlineTag;
use nom::{
    branch::alt,
    bytes::complete::tag,
    character::complete::{char, digit1, satisfy},
    combinator::{eof, map, opt, value},
    sequence::tuple,
    IResult,
};

pub fn punctuation(input: &str) -> IResult<&str, InlineTag> {
    let (rest, s) = alt((
        apostrophe,
        apostrophe2,
        single_closing,
        double_closing,
        dimension_sign,
        map(
            alt((
                value("‘", tag("'")),
                value("“", tag("\"")),
                value("…", tag("...")),
                value("—", tag("--")),
                value(" – ", tag(" - ")),
                value("™", tag("(TM)")),
                value("®", tag("(R)")),
                value("©", tag("(C)")),
            )),
            |s| String::from(s),
        ),
    ))(input)?;
    Ok((rest, InlineTag::Plaintext(s)))
}

fn apostrophe(input: &str) -> IResult<&str, String> {
    let (rest, (c0, apos, c1)) =
        tuple((alphanumeric, value('’', char('\'')), alphanumeric))(input)?;
    Ok((rest, format!("{}{}{}", c0, apos, c1)))
}

fn apostrophe2(input: &str) -> IResult<&str, String> {
    let (rest, (space, apos, num, alphanum, end)) = tuple((
        whitespace,
        value('’', char('\'')),
        digit1,
        opt(alphanumeric),
        satisfy(|c| !(c.is_alphanumeric() || c == '\'')),
    ))(input)?;
    Ok((
        rest,
        format!(
            "{}{}{}{}{}",
            space,
            apos,
            num,
            match alphanum {
                Some(c) => String::from(c),
                None => String::new(),
            },
            end
        ),
    ))
}

fn single_closing(input: &str) -> IResult<&str, String> {
    let (rest, (non_space, apos, end)) = tuple((
        non_whitespace,
        value('’', char('\'')),
        alt((
            value(String::new(), eof),
            map(non_alphanumeric, |c| String::from(c)),
        )),
    ))(input)?;
    Ok((rest, format!("{}{}{}", non_space, apos, end)))
}

fn double_closing(input: &str) -> IResult<&str, String> {
    let (rest, (non_space, apos, end)) = tuple((
        non_whitespace,
        value('”', char('"')),
        alt((
            value(String::new(), eof),
            map(non_alphanumeric, |c| String::from(c)),
        )),
    ))(input)?;
    Ok((rest, format!("{}{}{}", non_space, apos, end)))
}

fn alphanumeric(input: &str) -> IResult<&str, char> {
    satisfy(|c| c.is_alphanumeric())(input)
}

fn non_alphanumeric(input: &str) -> IResult<&str, char> {
    satisfy(|c| !c.is_alphanumeric())(input)
}

fn whitespace(input: &str) -> IResult<&str, char> {
    satisfy(|c| c.is_whitespace())(input)
}

fn non_whitespace(input: &str) -> IResult<&str, char> {
    satisfy(|c| !c.is_whitespace())(input)
}

fn dimension_sign(input: &str) -> IResult<&str, String> {
    let (rest, (num0, space0, x, space1, num1)) = tuple((
        digit1,
        alt((tag(" "), tag(""))),
        value('×', char('x')),
        alt((tag(" "), tag(""))),
        digit1,
    ))(input)?;
    Ok((rest, format!("{}{}{}{}{}", num0, space0, x, space1, num1)))
}

A tests/integrated.rs => tests/integrated.rs +8 -0
@@ 0,0 1,8 @@
use flatiron::convert;

#[test]
fn sample() {
    let textile = String::from(include_str!("../samples/textism.textile"));
    let html = convert(textile);
    assert_eq!(html, include_str!("../samples/textism.html"))
}