@@ 279,6 279,48 @@ static size_t try_parse_class(const char* str, size_t n, const char** out) {
return 0;
}
+struct escape_seq_kv { char from; const char* to; };
+struct escape_seq_kv escape_seq_map[] = {
+ { 't', "\t" },
+ { 'n', "\n" },
+ { 'r', "\r" },
+ { 'f', "\f" },
+ { 'a', "\a" },
+// Kdevelop doesn't know \e?
+// { 'e', '\e' },
+ { 'b', "\b" }
+};
+
+static const char* try_parse_escape_seq(char in) {
+ struct escape_seq_kv* cur = escape_seq_map;
+ struct escape_seq_kv* end = escape_seq_map + sizeof(escape_seq_map) / sizeof(*escape_seq_map);
+ for (; cur != end; ++cur) {
+ if (in == cur->from) {
+ return cur->to;
+ }
+ }
+ return NULL;
+}
+
+static size_t try_parse_lit(const char* str, size_t n, Glyph* parsed) {
+ mbstate_t state;
+ memset(&state, '\0', sizeof(state));
+
+ size_t r = mbrtoc32(
+ parsed,
+ str,
+ n,
+ &state
+ );
+ switch (r) {
+ case (size_t)-1: assert(0 && "invalid input");
+ case (size_t)-2: assert(0 && "truncated input");
+ case (size_t)-3: assert(0 && "no surrogate pairs");
+ default: break;
+ }
+ return r;
+}
+
static int yylex(YYSTYPE* lvalp, struct location* locp, void* ctx, struct slice* src) {
struct parser_context* pctx = ctx;
const char** pos = &src->begin;
@@ 301,6 343,14 @@ static int yylex(YYSTYPE* lvalp, struct location* locp, void* ctx, struct slice*
*pos += len;
return TOKEN_CLASS;
}
+ const char* seq = try_parse_escape_seq(**pos);
+ if (seq) {
+ size_t seq_len = strlen(seq);
+ size_t r = try_parse_lit(seq, seq_len, &lvalp->glyph);
+ *pos += r;
+ return TOKEN_LIT;
+ }
+
// rollback, so that we could re-parse it as glyph
--(*pos);
break;
@@ 321,29 371,10 @@ static int yylex(YYSTYPE* lvalp, struct location* locp, void* ctx, struct slice*
}
// TODO: provide escape/control sequences like \w \t \n \f etc
-
- // this is a lit
- mbstate_t state;
- memset(&state, '\0', sizeof(state));
-
- Glyph glyph;
-
- size_t r = mbrtoc32(
- &glyph,
- *pos,
- src->end - *pos,
- &state
- );
- switch (r) {
- case (size_t)-1: assert(0 && "invalid input");
- case (size_t)-2: assert(0 && "truncated input");
- case (size_t)-3: assert(0 && "no surrogate pairs");
- default: break;
- }
+ size_t r = try_parse_lit(*pos, src->end - *pos, &lvalp->glyph);
*pos += r;
locp->pos = *pos;
- lvalp->glyph = glyph;
return TOKEN_LIT;
}