@@ 15,10 15,11 @@
#include <slice.h>
#include <ast.h>
#include <parser_context.h>
+#include <stdio.h>
struct location {
- Byte* pos;
- Byte* end;
+ const Byte* pos;
+ const Byte* end;
};
#define YYLLOC_DEFAULT(Cur, Rhs, N) \
@@ 53,12 54,13 @@ struct location {
struct set_piece set_elem;
Glyph glyph;
char repeat;
+ const char* class_;
}
%token TOKEN_EOF
%token <glyph> TOKEN_LIT
%token <repeat> TOKEN_REPEAT
-%token <class> TOKEN_CLASS
+%token <class_> TOKEN_CLASS
%type <node> regex seq expr plain_expr repeatable_expr repeated_expr or atom
%type <set> set set_expr_list
@@ 240,29 242,44 @@ set_expr_list: set_expr
}
;
-class: '\\' class_
+class: TOKEN_CLASS
+ {
+ fprintf(stderr, "Got class: %s\n", $1);
+ }
;
-class_: 'a'
- | 'b'
- | 'B'
- | 'e'
- | 'f'
- | 'v'
- | 'd'
- | 'D'
- | 's'
- | 'S'
- | 'w'
- | 'W'
- ;
-
%%
#include <stdio.h>
-int yylex(YYSTYPE* lvalp, struct location* locp, void* ctx, struct slice* src) {
+const char* known_classes[] = {
+ "e",
+ "v",
+ "d",
+ "D",
+ "s",
+ "S",
+ "w",
+ "W",
+ "N"
+};
+
+static size_t try_parse_class(const char* str, size_t n, const char** out) {
+ const char** cur = known_classes;
+ const char** end = known_classes + sizeof(known_classes) / sizeof(*known_classes);
+ size_t cur_len;
+ for (; cur != end; ++cur) {
+ cur_len = strlen(*cur);
+ if (cur_len <= n && strncmp(*cur, str, cur_len) == 0) {
+ *out = *cur;
+ return cur_len;
+ }
+ }
+ return 0;
+}
+
+static int yylex(YYSTYPE* lvalp, struct location* locp, void* ctx, struct slice* src) {
struct parser_context* pctx = ctx;
const char** pos = &src->begin;
@@ 272,15 289,22 @@ int yylex(YYSTYPE* lvalp, struct location* locp, void* ctx, struct slice* src) {
return YYEOF;
}
- int escaped = 0;
switch (**pos) {
case '\\':
- escaped = 1;
+ {
++(*pos);
if (*pos == src->end) {
return YYEOF;
}
+ size_t len = try_parse_class(*pos, src->end - *pos, &lvalp->class_);
+ if (len) {
+ *pos += len;
+ return TOKEN_CLASS;
+ }
+ // rollback, so that we could re-parse it as glyph
+ --(*pos);
break;
+ }
case '|': ++(*pos); return '|';
case '+':
case '*':
@@ 296,6 320,8 @@ int yylex(YYSTYPE* lvalp, struct location* locp, void* ctx, struct slice* src) {
default: break;
}
+ // TODO: provide escape/control sequences like \w \t \n \f etc
+
// this is a lit
mbstate_t state;
memset(&state, '\0', sizeof(state));