M README.textile => README.textile +0 -5
@@ 22,8 22,3 @@ __"It should do what it says on the iron!"__
h2. Contributing
Find any "lice (bugs)":https://en.wikipedia.org/wiki/Clothes_iron#Hygiene ? I always appreciate "offers to fix up my flatiron":https://github.com/autumnull/flatiron/pulls !
-
-h3. TODO
-
-* Parsing
-** Punctuation transforms
A samples/textism.html => samples/textism.html +87 -0
@@ 0,0 1,87 @@
+<h2 style="color:green">This is a title</h2>
+<h3>This is a subhead</h3>
+<p style="color:red">This is some text of dubious character. Isn’t the use of “quotes” just lazy writing — and theft of ‘intellectual property’ besides? I think the time has come to see a block quote.</p>
+<blockquote lang="fr">This is a block quote. I’ll admit it’s not the most exciting block quote ever devised.</blockquote>
+<p>Simple list:</p>
+<ol style="color:blue">
+<li>one</li>
+<li>two</li>
+<li>three</li>
+</ol>
+<p>Multi-level list:</p>
+<ol>
+<li>one
+<ol>
+<li>aye</li>
+<li>bee</li>
+<li>see</li>
+</ol>
+</li>
+<li>two
+<ol>
+<li>x</li>
+<li>y</li>
+</ol>
+</li>
+<li>three</li>
+</ol>
+<p>Mixed list:</p>
+<ul>
+<li>Point one</li>
+<li>Point two
+<ol>
+<li>Step 1</li>
+<li>Step 2</li>
+<li>Step 3</li>
+</ol>
+</li>
+<li>Point three
+<ul>
+<li>Sub point 1</li>
+<li>Sub point 2</li>
+</ul>
+</li>
+</ul>
+<p>Well, that went well. How about we insert an <a href="http://www.textism.com/" title="watch out">old-fashioned hypertext link</a>? Will the quote marks in the tags get messed up? No!</p>
+<p><a href="http://www.textism.com" title="optional title">This is a link</a></p>
+<table style="border:1px solid black">
+<tr>
+<th>this</th>
+<th>is</th>
+<th>a</th>
+<th>header</th>
+</tr>
+<tr style="text-align: left; background:gray">
+<td colspan=2>this is</td>
+<td style="background:red;width:200px">a</td>
+<td style="text-align: justify;vertical-align: top; height:200px">row</td>
+</tr>
+<tr>
+<td>this</td>
+<td style="text-align: justify; padding:10px">is</td>
+<td style="vertical-align: top;">another</td>
+<td class="bob" id="bob">row</td>
+</tr>
+</table>
+<p>An image:</p>
+<p><img style="display: block; margin: auto;" src="images/flatiron.png" alt="optional alt text" /></p>
+<ol>
+<li>Librarians rule</li>
+<li>Yes they do</li>
+<li>But you knew that</li>
+</ol>
+<p>Some more text of dubious character. Here is a noisome string of <span class="caps">CAPITAL</span> letters. Here is something we want to <em>emphasize</em>.<br />
+That was a linebreak. And something to indicate <strong>strength</strong>. Of course I could use <em>my own <span class="caps">HTML</span> tags</em> if I <strong>felt</strong> like it.</p>
+<h3>Coding</h3>
+<p>This <code>is some code, "isn't it"</code>. Watch those quote marks! Now for some preformatted text:</p>
+<pre><code>$text = str_replace("<p>%::%</p>","",$text);
+$text = str_replace("%::%</p>","",$text);
+$text = str_replace("%::%","",$text);</code></pre>
+<p>This isn’t code.</p>
+<p>So you see, my friends:</p>
+<ul>
+<li>The time is now</li>
+<li>The time is not later</li>
+<li>The time is not yesterday</li>
+<li>We must act</li>
+</ul>
M src/parse/block.rs => src/parse/block.rs +3 -6
@@ 7,8 7,8 @@ use crate::structs::{
};
use nom::{
branch::alt,
- bytes::complete::{escaped_transform, tag, take_while1},
- character::complete::{char, line_ending, none_of},
+ bytes::complete::{escaped_transform, tag},
+ character::complete::{char, digit1, line_ending, none_of},
combinator::{complete, eof, fail, map_res, opt, value},
multi::many0_count,
sequence::{preceded, tuple},
@@ 177,10 177,7 @@ fn header_modifier(input: &str) -> IResult<&str, BlockKind> {
}
fn footnote_modifier(input: &str) -> IResult<&str, BlockKind> {
- let (rest, n) = preceded(
- tag("fn"),
- map_res(take_while1(|c: char| c.is_ascii_digit()), from_num),
- )(input)?;
+ let (rest, n) = preceded(tag("fn"), map_res(digit1, from_num))(input)?;
Ok((rest, BlockKind::Footnote(n)))
}
M src/parse/link.rs => src/parse/link.rs +1 -1
@@ 180,7 180,7 @@ mod tests {
"https://en.wikipedia.org/wiki/Magic_Johnson"
),
content: Box::new(InlineTag::Plaintext(String::from(
- "Earvin \"Magic\" Johnson (Basketball Player)"
+ "Earvin “Magic” Johnson (Basketball Player)"
)))
}
))
M src/parse/mod.rs => src/parse/mod.rs +5 -3
@@ 18,6 18,7 @@ mod link;
mod list;
mod no_textile;
mod phrase;
+mod punctuation;
mod table;
impl PhraseKind {
@@ 68,6 69,7 @@ impl InlineTag {
phrase::footnote_ref,
link::link,
image::image,
+ punctuation::punctuation,
]
}
}
@@ 237,7 239,7 @@ This is a paragraph with some _emphasized text_.";
indent: None,
align: None,
},
- content: InlineTag::Plaintext(String::from("This is some text of dubious character. Isn't the use of \"quotes\" just lazy writing -- and theft of 'intellectual property' besides? I think the time has come to see a block quote."))
+ content: InlineTag::Plaintext(String::from("This is some text of dubious character. Isn’t the use of “quotes” just lazy writing — and theft of ‘intellectual property’ besides? I think the time has come to see a block quote."))
},
BlockTag::Basic {
kind: BlockKind::BlockQuote,
@@ 251,7 253,7 @@ This is a paragraph with some _emphasized text_.";
indent: None,
align: None,
},
- content: InlineTag::Plaintext(String::from("This is a block quote. I'll admit it's not the most exciting block quote ever devised."))
+ content: InlineTag::Plaintext(String::from("This is a block quote. I’ll admit it’s not the most exciting block quote ever devised."))
},
BlockTag::Basic {
kind: BlockKind::Paragraph,
@@ 739,7 741,7 @@ This is a paragraph with some _emphasized text_.";
header: BlockHeader {attributes: None,
indent: None,
align: None,},
- content: InlineTag::Plaintext(String::from("This isn't code."))
+ content: InlineTag::Plaintext(String::from("This isn’t code."))
},
BlockTag::Basic {
kind: BlockKind::Paragraph,
M src/parse/phrase.rs => src/parse/phrase.rs +36 -11
@@ 1,10 1,12 @@
-use crate::parse::block::strip_flatiron_extended;
-use crate::parse::{acronym::acronym, attributes::attributes, from_num};
+use crate::parse::{
+ acronym::acronym, attributes::attributes, block::strip_flatiron_extended,
+ from_num,
+};
use crate::structs::{InlineTag, PhraseKind};
use nom::{
branch::alt,
- bytes::complete::{escaped_transform, take_while1},
- character::complete::{char, line_ending, none_of},
+ bytes::complete::escaped_transform,
+ character::complete::{char, digit1, line_ending, none_of},
combinator::{complete, fail, map_res, opt, value},
sequence::delimited,
IResult,
@@ 22,11 24,15 @@ pub fn phrase(input: &str) -> IResult<&str, InlineTag> {
let mut i = 0;
let mut preceding_space = true;
loop {
+ // combine plaintext phrases
+ combine_plaintext(&mut content);
+
// check if end of phrase has been reached
if &input[i..] == "" {
if i > 0 {
content.push(InlineTag::Plaintext(String::from(&input[..i])));
}
+ combine_plaintext(&mut content);
match content.len() {
0 => {
// return empty plaintext rather than phrase
@@ 110,12 116,17 @@ fn tagged_phrase(
if i >= input.len() {
return fail(input);
}
+
+ // combine plaintext phrases
+ combine_plaintext(&mut content);
+
// check if end of phrase has been reached
if let Ok((rest, _delimiter)) = kind.delimiter(&input[i..]) {
if i > 0 {
content
.push(InlineTag::Plaintext(String::from(&input[..i])));
}
+ combine_plaintext(&mut content);
return Ok((
rest,
InlineTag::Phrase {
@@ 172,6 183,23 @@ fn tagged_phrase(
}
}
+fn combine_plaintext(v: &mut Vec<InlineTag>) {
+ while v.len() >= 2 {
+ let (i0, i1) = (v.len() - 2, v.len() - 1);
+ if let InlineTag::Plaintext(_) = &v[i0] {
+ if let InlineTag::Plaintext(_) = &v[i1] {
+ if let Some(InlineTag::Plaintext(s1)) = v.pop() {
+ if let Some(InlineTag::Plaintext(s0)) = v.last_mut() {
+ s0.push_str(s1.as_str());
+ continue;
+ }
+ }
+ }
+ }
+ break;
+ }
+}
+
pub fn line_break(input: &str) -> IResult<&str, InlineTag> {
let (rest, _) = line_ending(input)?;
Ok((rest, InlineTag::LineBreak))
@@ 191,11 219,8 @@ pub fn code(input: &str) -> IResult<&str, InlineTag> {
}
pub fn footnote_ref(input: &str) -> IResult<&str, InlineTag> {
- let (rest, n) = delimited(
- char('['),
- map_res(take_while1(|c: char| c.is_ascii_digit()), from_num),
- char(']'),
- )(input)?;
+ let (rest, n) =
+ delimited(char('['), map_res(digit1, from_num), char(']'))(input)?;
Ok((rest, InlineTag::FootnoteRef(n)))
}
@@ 485,7 510,7 @@ mod tests {
#[test]
fn phrase_with_attributes() {
- let input = "Look, it's %{color:red}red%";
+ let input = "Wow, %{color:red}red%";
let result = phrase(input);
assert_eq!(
result,
@@ 495,7 520,7 @@ mod tests {
kind: None,
attributes: None,
content: vec![
- InlineTag::Plaintext(String::from("Look, it's ")),
+ InlineTag::Plaintext(String::from("Wow, ")),
InlineTag::Phrase {
kind: Some(PhraseKind::Span),
attributes: Some(Attributes {
A src/parse/punctuation.rs => src/parse/punctuation.rs +114 -0
@@ 0,0 1,114 @@
+use crate::structs::InlineTag;
+use nom::{
+ branch::alt,
+ bytes::complete::tag,
+ character::complete::{char, digit1, satisfy},
+ combinator::{eof, map, opt, value},
+ sequence::tuple,
+ IResult,
+};
+
+pub fn punctuation(input: &str) -> IResult<&str, InlineTag> {
+ let (rest, s) = alt((
+ apostrophe,
+ apostrophe2,
+ single_closing,
+ double_closing,
+ dimension_sign,
+ map(
+ alt((
+ value("‘", tag("'")),
+ value("“", tag("\"")),
+ value("…", tag("...")),
+ value("—", tag("--")),
+ value(" – ", tag(" - ")),
+ value("™", tag("(TM)")),
+ value("®", tag("(R)")),
+ value("©", tag("(C)")),
+ )),
+ |s| String::from(s),
+ ),
+ ))(input)?;
+ Ok((rest, InlineTag::Plaintext(s)))
+}
+
+fn apostrophe(input: &str) -> IResult<&str, String> {
+ let (rest, (c0, apos, c1)) =
+ tuple((alphanumeric, value('’', char('\'')), alphanumeric))(input)?;
+ Ok((rest, format!("{}{}{}", c0, apos, c1)))
+}
+
+fn apostrophe2(input: &str) -> IResult<&str, String> {
+ let (rest, (space, apos, num, alphanum, end)) = tuple((
+ whitespace,
+ value('’', char('\'')),
+ digit1,
+ opt(alphanumeric),
+ satisfy(|c| !(c.is_alphanumeric() || c == '\'')),
+ ))(input)?;
+ Ok((
+ rest,
+ format!(
+ "{}{}{}{}{}",
+ space,
+ apos,
+ num,
+ match alphanum {
+ Some(c) => String::from(c),
+ None => String::new(),
+ },
+ end
+ ),
+ ))
+}
+
+fn single_closing(input: &str) -> IResult<&str, String> {
+ let (rest, (non_space, apos, end)) = tuple((
+ non_whitespace,
+ value('’', char('\'')),
+ alt((
+ value(String::new(), eof),
+ map(non_alphanumeric, |c| String::from(c)),
+ )),
+ ))(input)?;
+ Ok((rest, format!("{}{}{}", non_space, apos, end)))
+}
+
+fn double_closing(input: &str) -> IResult<&str, String> {
+ let (rest, (non_space, apos, end)) = tuple((
+ non_whitespace,
+ value('”', char('"')),
+ alt((
+ value(String::new(), eof),
+ map(non_alphanumeric, |c| String::from(c)),
+ )),
+ ))(input)?;
+ Ok((rest, format!("{}{}{}", non_space, apos, end)))
+}
+
+fn alphanumeric(input: &str) -> IResult<&str, char> {
+ satisfy(|c| c.is_alphanumeric())(input)
+}
+
+fn non_alphanumeric(input: &str) -> IResult<&str, char> {
+ satisfy(|c| !c.is_alphanumeric())(input)
+}
+
+fn whitespace(input: &str) -> IResult<&str, char> {
+ satisfy(|c| c.is_whitespace())(input)
+}
+
+fn non_whitespace(input: &str) -> IResult<&str, char> {
+ satisfy(|c| !c.is_whitespace())(input)
+}
+
+fn dimension_sign(input: &str) -> IResult<&str, String> {
+ let (rest, (num0, space0, x, space1, num1)) = tuple((
+ digit1,
+ alt((tag(" "), tag(""))),
+ value('×', char('x')),
+ alt((tag(" "), tag(""))),
+ digit1,
+ ))(input)?;
+ Ok((rest, format!("{}{}{}{}{}", num0, space0, x, space1, num1)))
+}
A tests/integrated.rs => tests/integrated.rs +8 -0
@@ 0,0 1,8 @@
+use flatiron::convert;
+
+#[test]
+fn sample() {
+ let textile = String::from(include_str!("../samples/textism.textile"));
+ let html = convert(textile);
+ assert_eq!(html, include_str!("../samples/textism.html"))
+}