~otheb/hl

75efed1bd357f4e5ca5a5cc653338ccb7e318631 — Olie Ayre 3 years ago 2a4dca7
Write program
1 files changed, 228 insertions(+), 0 deletions(-)

A hl.d
A hl.d => hl.d +228 -0
@@ 0,0 1,228 @@
module hl ;

import std.stdio     ;
import std.range     ;
import std.array     ;
import std.file      ;
import std.typecons  ;
import std.string    ;
import std.algorithm ;

import core.stdc.stdlib : exit ;

void main( string[] args ) {
	string config               ;
	string input = "/dev/stdin" ;
	string output               ;

	// args parsing
	if ( args.length == 1 ) {
		stderr.writeln( "Usage: " , args[0] ,
			" -c|--config <config> [-o|--output <output>] [<input>]" ) ;
		return ;
	}

	args.popFront ;

	while ( ! args.empty ) {
		switch ( args.front ) {
			case "-c" : case "--config" :
				args.popFront ;
				if ( args.empty )
					abort( "Config file expected after -c|--config" ) ;
				if ( args.front[0] == '-' )
					abort( "Config file expected after -c|--config" ) ;
				config = args.front ;
				args.popFront ;
				break ;
			case "-o" : case "--output" :
				args.popFront ;
				if ( args.empty )
					abort( "Output file expected after -o|--output" ) ;
				if ( args.front[0] == '-' )
					abort( "Output file expected after -o|--output" ) ;
				output = args.front ;
				args.popFront ;
				break ;
			default :
				if ( args.front[0] == '-' )
					abort( "Invalid option '" , args.front , "'" ) ;
				if ( input != "/dev/stdin" )
					abort( "Only one input file allowed" ) ;
				input = args.front ;
				args.popFront ;
				break ;
		}
	}

	// read config
	string configText ;
	try configText = config.readText ;
	catch ( Throwable ) abort( "Config file '" , config ,
	                           "' does not exist or could not be read" ) ;
	auto conf = parseConfig( configText ) ;

	// parse file
	string source ;
	try source = input.readText ;
	catch ( Throwable ) abort( "Input file '" , input ,
	                           "' does not exist or could not be read" ) ;
	if ( output.empty ) write( parseFile( source , conf ) ) ;
	else std.file.write( output , parseFile( source , conf ) ) ;
}

void abort(T...)( T args ) {
	stderr.writeln( args ) ;
	exit( 1 ) ;
}

alias Rule = Tuple!( string , "start" , string , "end" ) ;

alias Config = Tuple!( Rule[char] , "rules" , bool , "nesting" ) ;

Config parseConfig( string config ) {
	Rule[char] rules                       ;
	bool       nesting = true              ;
	bool       hadErr  = false             ;
	string[]   lines   = config.splitLines ;

	if ( ! lines.empty ) //
	if ( lines.front == "#nonest" ) {
		nesting = false ;
		lines.popFront ;
	}

	void err(T...)( T args ) {
		stderr.writeln( args ) ;
		hadErr = true ;
	}

	foreach ( l ; lines ) {
		auto a = l.split( '\t' ) ;
		if ( a.length < 3 ) {
			err( "Config rule has too few arguments:\n\t" , l ) ;
			continue ;
		}
		if ( a.front.length > 1 ) {
			err( "Rule identifier '" , a.front ,
			     "' may only be a single character:\n\t" , l ) ;
			continue ;
		}
		char id = a.front[0] ;
		if ( rules.keys.canFind( id ) ) {
			err( "Rule identifier '" , id , "' appears more than once:\n\t" ,
			     l ) ;
			continue ;
		}
		if ( id.isRuleChar ) rules[id] = Rule( a[1] , a[2] ) ;
		else {
			err( "Rule identifier '" , id ,
			     "' may only be in the range A-Za-z:\n\t" , l ) ;
			continue ;
		}
	}

	if ( hadErr ) abort( "Config has errors" ) ;

	return Config( rules , nesting ) ;
}

void pop(T)( ref T[] a ) {
	assert( ! a.empty ) ;
	a = a[ 0 .. $ - 1 ] ;
}

T top(T)( T[] a ) {
	assert( ! a.empty ) ;
	return a[ $ - 1 ] ;
}

char peek( ref string s , ulong offset = 0 ) {
	if ( s.empty ) return '\0' ;
	return s[offset] ;
}

void pop( ref string s , ulong amnt = 1 ) {
	assert( ! s.empty ) ;
	s = s[amnt..$] ;
}

bool isRuleChar( char c ) {
	return c >= 65 && c <= 122 && ( c >= 97 || c <= 90 ) ;
}

string parseFile( string source , Config config ) {
	enum : bool { word , region }
	string output   ;
	char[] stack    ;
	bool[] type     ;
	ulong  line = 1 ;

	bool hasStack() { return stack.length + type.length > 0 ; }
	void makeStart( bool isWord = false )( char i ) {
		if ( ! config.rules.keys.canFind( i ) )
			abort( "Undefined rule identifier '" , i , "' on line " , line ) ;
		// word rules end at all rule boundaries
		if ( hasStack ) if ( type.top == word ) makeEnd() ;
		// handle no nesting
		if ( ! config.nesting ) if ( hasStack )
			output ~= config.rules[stack.top].end ;
		stack  ~= i                      ;
		type   ~= isWord ? word : region ;
		output ~= config.rules[i].start  ;
	}
	void makeEnd() {
		if ( ! hasStack ) return ;
		output ~= config.rules[stack.top].end ;
		auto i = stack.top ;
		stack.pop ;
		type.pop  ;
		// handle nesting
		if ( ! config.nesting ) if ( hasStack )
			output ~= config.rules[stack.top].start ;
	}

	while ( ! source.empty ) {
		char c = source.peek ;
		switch ( c ) {
			case '%' :
				if ( source.peek( 1 ) == '%' ) {
					// plain '%'
					output ~= '%' ;
					source.pop( 2 ) ;
				} else if ( source.peek( 1 ) == '}' ) {
					// end of region
					if ( ! hasStack )
						abort( "Unexpected region end token on line " , line ) ;
					makeEnd ;
					source.pop( 2 ) ;
				} else if ( source.peek( 1 ) == '{' ) {
					// start of region
					char i = source.peek( 2 ) ;
					if ( ! i.isRuleChar ) abort( "Invalid rule identifier '" ,
					                             i , "' on line " , line ) ;
					makeStart( i ) ;
					source.pop( 3 ) ;
				} else {
					// start of word
					char i = source.peek( 1 ) ;
					if ( ! i.isRuleChar ) abort( "Invalid rule identifier '" ,
					                             i , "' on line " , line ) ;
					makeStart!true( i ) ;
					source.pop( 2 ) ;
				}
				break ;
			case '\n' :
				line ++ ;
				goto whitespace ;
			whitespace : case ' ' , '\t' , '\r' :
				if ( hasStack ) if ( type.top == word ) makeEnd ;
				goto default ;
			default :
				output ~= c ;
				source.pop ;
		}
	}

	return output ;
}