From b9cecd744749bc5b3f5989182190f61929ebe5a4 Mon Sep 17 00:00:00 2001 From: Michael Rees Date: Thu, 12 Oct 2023 08:24:10 -0500 Subject: [PATCH] Move parser back into single file The only reason to separate the parser files was because my editor setup was having problems with the file size, but I've since discovered that those problems were due to a configuration mistake, so let's merge them together again. --- src/{parser/mod.rs => parser.rs} | 274 ++++++++++++++++++++++++++++++- src/parser/doc_config.rs | 131 --------------- src/parser/error.rs | 75 --------- src/parser/units.rs | 70 -------- 4 files changed, 267 insertions(+), 283 deletions(-) rename src/{parser/mod.rs => parser.rs} (85%) delete mode 100644 src/parser/doc_config.rs delete mode 100644 src/parser/error.rs delete mode 100644 src/parser/units.rs diff --git a/src/parser/mod.rs b/src/parser.rs similarity index 85% rename from src/parser/mod.rs rename to src/parser.rs index 9270cb3..a27f9a2 100644 --- a/src/parser/mod.rs +++ b/src/parser.rs @@ -1,6 +1,10 @@ use std::collections::HashMap; use std::sync::Arc; +use lazy_static::lazy_static; +use regex::Regex; +use thiserror::Error; + use crate::alignment::Alignment; use crate::fonts::Font; use crate::lexer::Token; @@ -9,13 +13,79 @@ use crate::tab::Tab; const DEFAULT_COL_GUTTER: f64 = 20.0; -mod doc_config; -mod error; -mod units; - -pub use doc_config::DocConfig; -pub use error::ParseError; -pub use units::{parse_unit, PointsVal}; +#[derive(Debug, Error)] +pub enum ParseError { + #[error("invalid align argument: {0}")] + InvalidAlign(String), + #[error("tokens left over at the end")] + ExtraTokens, + #[error("this feature not implemented yet")] + Unimplemented, + #[error("encountered unescaped [")] + UnescapedOpenBrace, + #[error("encountered unescaped ]")] + UnescapedCloseBrace, + #[error("encountered unescaped -")] + UnescapedHyphen, + #[error("unknown command: '{0}'")] + UnknownCommand(String), + #[error("malformed align command")] + MalformedAlign, + #[error("malformed bold command")] + MalformedBold, + #[error("malformed italic command")] + MalformedItalic, + #[error("invalid style block")] + InvalidStyleBlock, + #[error("expected to find more tokens, found EOF instead")] + EndedEarly, + #[error("malformed command with measure unit argument")] + MalformedUnitCommand, + #[error("invalid command encountered in document configuration")] + InvalidConfiguration, + #[error("invalid value {0} encountered when integer expected")] + InvalidInt(String), + #[error("invalid unit {0} encountered as measurement")] + InvalidBool(String), + #[error("invalid value {0} encountered when bool expected")] + InvalidUnit(String), + #[error("invalid command with string argument")] + MalformedStrCommand, + #[error("encountered reset command in invalid context")] + InvalidReset, + #[error("malformed quote command")] + MalformedQuote, + #[error("malformed open quote command")] + MalformedOpenQuote, + #[error("malformed smallcaps command")] + MalformedSmallcaps, + #[error("invalid command with integer argument")] + MalformedIntCommand, + #[error("malformed rule command")] + MalformedRule, + #[error("unsupported curly-brace argument")] + InvalidArgument, + #[error("malformed columns command")] + MalformedColumns, + #[error("tried to use relative argument for an unsupported command")] + InvalidRelative, + #[error("malformed define_tab command")] + MalformedDefineTab, + #[error("entered curly brace parser without curly brace")] + MissingCurlyBrace, + #[error("bad curly brace syntax")] + MalformedCurlyBrace, + #[error("invalid tab direction")] + InvalidTabDirection, + #[error("bad tab list syntax")] + MalformedTabList, + #[error("repeated tab definition for '{0}'")] + DuplicateTab(String), + #[error("repeated curly brace definition for '{0}'")] + DuplicateCurlyBraceKey(String), + #[error("malformed command with boolean argument")] + MalformedBoolCommand, +} #[derive(Debug, PartialEq)] pub enum Command { @@ -106,6 +176,130 @@ struct Argument { value: String, } +#[derive(Default, Debug, PartialEq)] +pub struct DocConfig { + pub margins: Option, + pub pt_size: Option, + pub page_width: Option, + pub page_height: Option, + pub leading: Option, + pub par_space: Option, + pub par_indent: Option, + pub space_width: Option, + pub family: Option, + pub font: Option, + pub indent_first: bool, + pub alignment: Option, + pub consecutive_hyphens: Option, + pub letter_space: Option, + pub tabs: Vec, + pub tab_lists: HashMap>, + pub ligatures: Option, +} + +impl DocConfig { + pub fn build() -> Self { + Self::default() + } + + pub fn with_margins(mut self, margins: f64) -> Self { + self.margins = Some(margins); + self + } + + pub fn with_pt_size(mut self, pt_size: f64) -> Self { + self.pt_size = Some(pt_size); + self + } + + pub fn with_page_height(mut self, height: f64) -> Self { + self.page_height = Some(height); + self + } + + pub fn with_page_width(mut self, width: f64) -> Self { + self.page_width = Some(width); + self + } + + pub fn with_leading(mut self, lead: f64) -> Self { + self.leading = Some(lead); + self + } + + pub fn with_par_space(mut self, space: f64) -> Self { + self.par_space = Some(space); + self + } + + pub fn with_par_indent(mut self, indent: f64) -> Self { + self.par_indent = Some(indent); + self + } + + pub fn with_space_width(mut self, width: f64) -> Self { + self.space_width = Some(width); + self + } + + pub fn with_family(mut self, family: String) -> Self { + self.family = Some(family); + self + } + + pub fn with_font(mut self, font: Font) -> Self { + self.font = Some(font); + self + } + + pub fn with_indent_first(mut self, indent_first: bool) -> Self { + self.indent_first = indent_first; + self + } + + pub fn with_alignment(mut self, alignment: Alignment) -> Self { + self.alignment = Some(alignment); + self + } + + pub fn with_consecutive_hyphens(mut self, hyphens: u64) -> Self { + self.consecutive_hyphens = Some(hyphens); + self + } + + pub fn with_letter_space(mut self, letter_space: f64) -> Self { + self.letter_space = Some(letter_space); + self + } + + pub fn with_ligatures(mut self, ligatures: bool) -> Self { + self.ligatures = Some(ligatures); + self + } + + pub fn add_tab(mut self, tab: Tab) -> Result { + let mut tab = tab; + + if tab.name.is_none() { + tab.name = Some(format!("{}", self.tabs.len() + 1)); + } + + if self.tabs.iter().any(|t| t.name == tab.name) { + return Err(ParseError::DuplicateTab( + tab.name.expect("all tab names should be set").clone(), + )); + } + + self.tabs.push(tab); + Ok(self) + } + + pub fn add_tab_list(mut self, list: Vec, name: String) -> Self { + self.tab_lists.insert(name, list); + self + } +} + fn pop_spaces(tokens: &[Token]) -> &[Token] { match tokens { [Token::Space, rest @ ..] => pop_spaces(rest), @@ -831,6 +1025,72 @@ pub fn parse_tokens(tokens: &[Token]) -> Result { } } +#[derive(Debug, PartialEq)] +pub enum PointsVal { + Static(f64), + Relative(f64), +} + +impl PointsVal { + pub fn value(&self) -> Result { + match self { + PointsVal::Relative(_) => Err(ParseError::InvalidRelative), + PointsVal::Static(val) => Ok(*val), + } + } +} + +// Internally, we keep everything in points, +// but we want to accept arguments in many units: +// points, picas, millimeters, inches, etc. +// (We'll add more units as needed.) +pub fn parse_unit(input: &str) -> Result { + lazy_static! { + static ref RE: Regex = Regex::new(r"^(?P[+-]?)(?P[\d\.]+)(?P[\w%]*)$") + .expect("should have a valid regex here"); + } + let caps = RE + .captures(input) + .ok_or(ParseError::InvalidUnit(input.to_string()))?; + let num = caps.name("num").expect("should have a matching group"); + let mut num = num + .as_str() + .parse::() + .map_err(|_| ParseError::InvalidInt(input.to_string()))?; + + let mut relative = false; + + if let Some(unit) = caps.name("unit") { + num = match unit.as_str() { + "pt" => num, + "in" => 72. * num, + "mm" => 2.83464576 * num, + "P" => 12. * num, + "" => num, + "%" => num / 100., + _ => return Err(ParseError::InvalidUnit(unit.as_str().to_string())), + }; + } + + if let Some(sign) = caps.name("sign") { + match sign.as_str() { + "" => {} + "+" => relative = true, + "-" => { + relative = true; + num *= -1.; + } + _ => unreachable!(), + }; + }; + + if relative { + Ok(PointsVal::Relative(num)) + } else { + Ok(PointsVal::Static(num)) + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/parser/doc_config.rs b/src/parser/doc_config.rs deleted file mode 100644 index 5cf0980..0000000 --- a/src/parser/doc_config.rs +++ /dev/null @@ -1,131 +0,0 @@ -use std::collections::HashMap; - -use crate::fonts::Font; -use crate::alignment::Alignment; -use crate::tab::Tab; - -use crate::parser::error::ParseError; - -#[derive(Default, Debug, PartialEq)] -pub struct DocConfig { - pub margins: Option, - pub pt_size: Option, - pub page_width: Option, - pub page_height: Option, - pub leading: Option, - pub par_space: Option, - pub par_indent: Option, - pub space_width: Option, - pub family: Option, - pub font: Option, - pub indent_first: bool, - pub alignment: Option, - pub consecutive_hyphens: Option, - pub letter_space: Option, - pub tabs: Vec, - pub tab_lists: HashMap>, - pub ligatures: Option, -} - -impl DocConfig { - pub fn build() -> Self { - Self::default() - } - - pub fn with_margins(mut self, margins: f64) -> Self { - self.margins = Some(margins); - self - } - - pub fn with_pt_size(mut self, pt_size: f64) -> Self { - self.pt_size = Some(pt_size); - self - } - - pub fn with_page_height(mut self, height: f64) -> Self { - self.page_height = Some(height); - self - } - - pub fn with_page_width(mut self, width: f64) -> Self { - self.page_width = Some(width); - self - } - - pub fn with_leading(mut self, lead: f64) -> Self { - self.leading = Some(lead); - self - } - - pub fn with_par_space(mut self, space: f64) -> Self { - self.par_space = Some(space); - self - } - - pub fn with_par_indent(mut self, indent: f64) -> Self { - self.par_indent = Some(indent); - self - } - - pub fn with_space_width(mut self, width: f64) -> Self { - self.space_width = Some(width); - self - } - - pub fn with_family(mut self, family: String) -> Self { - self.family = Some(family); - self - } - - pub fn with_font(mut self, font: Font) -> Self { - self.font = Some(font); - self - } - - pub fn with_indent_first(mut self, indent_first: bool) -> Self { - self.indent_first = indent_first; - self - } - - pub fn with_alignment(mut self, alignment: Alignment) -> Self { - self.alignment = Some(alignment); - self - } - - pub fn with_consecutive_hyphens(mut self, hyphens: u64) -> Self { - self.consecutive_hyphens = Some(hyphens); - self - } - - pub fn with_letter_space(mut self, letter_space: f64) -> Self { - self.letter_space = Some(letter_space); - self - } - - pub fn with_ligatures(mut self, ligatures: bool) -> Self { - self.ligatures = Some(ligatures); - self - } - - pub fn add_tab(mut self, tab: Tab) -> Result { - let mut tab = tab; - - if tab.name.is_none() { - tab.name = Some(format!("{}", self.tabs.len() + 1)); - } - - if self.tabs.iter().any(|t| t.name == tab.name) { - return Err(ParseError::DuplicateTab( - tab.name.expect("all tab names should be set").clone(), - )); - } - - self.tabs.push(tab); - Ok(self) - } - - pub fn add_tab_list(mut self, list: Vec, name: String) -> Self { - self.tab_lists.insert(name, list); - self - } -} diff --git a/src/parser/error.rs b/src/parser/error.rs deleted file mode 100644 index d95a2a3..0000000 --- a/src/parser/error.rs +++ /dev/null @@ -1,75 +0,0 @@ -use thiserror::Error; - -#[derive(Debug, Error)] -pub enum ParseError { - #[error("invalid align argument: {0}")] - InvalidAlign(String), - #[error("tokens left over at the end")] - ExtraTokens, - #[error("this feature not implemented yet")] - Unimplemented, - #[error("encountered unescaped [")] - UnescapedOpenBrace, - #[error("encountered unescaped ]")] - UnescapedCloseBrace, - #[error("encountered unescaped -")] - UnescapedHyphen, - #[error("unknown command: '{0}'")] - UnknownCommand(String), - #[error("malformed align command")] - MalformedAlign, - #[error("malformed bold command")] - MalformedBold, - #[error("malformed italic command")] - MalformedItalic, - #[error("invalid style block")] - InvalidStyleBlock, - #[error("expected to find more tokens, found EOF instead")] - EndedEarly, - #[error("malformed command with measure unit argument")] - MalformedUnitCommand, - #[error("invalid command encountered in document configuration")] - InvalidConfiguration, - #[error("invalid value {0} encountered when integer expected")] - InvalidInt(String), - #[error("invalid unit {0} encountered as measurement")] - InvalidBool(String), - #[error("invalid value {0} encountered when bool expected")] - InvalidUnit(String), - #[error("invalid command with string argument")] - MalformedStrCommand, - #[error("encountered reset command in invalid context")] - InvalidReset, - #[error("malformed quote command")] - MalformedQuote, - #[error("malformed open quote command")] - MalformedOpenQuote, - #[error("malformed smallcaps command")] - MalformedSmallcaps, - #[error("invalid command with integer argument")] - MalformedIntCommand, - #[error("malformed rule command")] - MalformedRule, - #[error("unsupported curly-brace argument")] - InvalidArgument, - #[error("malformed columns command")] - MalformedColumns, - #[error("tried to use relative argument for an unsupported command")] - InvalidRelative, - #[error("malformed define_tab command")] - MalformedDefineTab, - #[error("entered curly brace parser without curly brace")] - MissingCurlyBrace, - #[error("bad curly brace syntax")] - MalformedCurlyBrace, - #[error("invalid tab direction")] - InvalidTabDirection, - #[error("bad tab list syntax")] - MalformedTabList, - #[error("repeated tab definition for '{0}'")] - DuplicateTab(String), - #[error("repeated curly brace definition for '{0}'")] - DuplicateCurlyBraceKey(String), - #[error("malformed command with boolean argument")] - MalformedBoolCommand, -} diff --git a/src/parser/units.rs b/src/parser/units.rs deleted file mode 100644 index cc5c72b..0000000 --- a/src/parser/units.rs +++ /dev/null @@ -1,70 +0,0 @@ -use lazy_static::lazy_static; -use regex::Regex; - -use crate::parser::error::ParseError; - -#[derive(Debug, PartialEq)] -pub enum PointsVal { - Static(f64), - Relative(f64), -} - -impl PointsVal { - pub fn value(&self) -> Result { - match self { - PointsVal::Relative(_) => Err(ParseError::InvalidRelative), - PointsVal::Static(val) => Ok(*val), - } - } -} - -// Internally, we keep everything in points, -// but we want to accept arguments in many units: -// points, picas, millimeters, inches, etc. -// (We'll add more units as needed.) -pub fn parse_unit(input: &str) -> Result { - lazy_static! { - static ref RE: Regex = Regex::new(r"^(?P[+-]?)(?P[\d\.]+)(?P[\w%]*)$") - .expect("should have a valid regex here"); - } - let caps = RE - .captures(input) - .ok_or(ParseError::InvalidUnit(input.to_string()))?; - let num = caps.name("num").expect("should have a matching group"); - let mut num = num - .as_str() - .parse::() - .map_err(|_| ParseError::InvalidInt(input.to_string()))?; - - let mut relative = false; - - if let Some(unit) = caps.name("unit") { - num = match unit.as_str() { - "pt" => num, - "in" => 72. * num, - "mm" => 2.83464576 * num, - "P" => 12. * num, - "" => num, - "%" => num / 100., - _ => return Err(ParseError::InvalidUnit(unit.as_str().to_string())), - }; - } - - if let Some(sign) = caps.name("sign") { - match sign.as_str() { - "" => {} - "+" => relative = true, - "-" => { - relative = true; - num *= -1.; - } - _ => unreachable!(), - }; - }; - - if relative { - Ok(PointsVal::Relative(num)) - } else { - Ok(PointsVal::Static(num)) - } -} -- 2.45.2