~jack/misc

de3310a02bb24077ba36c783cb44a26df51f33f9 — Jack Kelly 6 months ago cf38d11
lambda-c: lexer, broken parser
M lambda-c/.gitignore => lambda-c/.gitignore +1 -0
@@ 11,4 11,5 @@ config.status
configure
m4/*.m4
src/lambda
src/lambda.c
stamp-h1

M lambda-c/configure.ac => lambda-c/configure.ac +2 -0
@@ 12,6 12,8 @@ AM_SILENT_RULES([yes])

# Checks for programs.
AC_PROG_CC
AC_PATH_PROG([RAGEL], [ragel])
AS_IF([test -z "$RAGEL"], [AC_MSG_ERROR([ragel is required.])])

# Checks for libraries.
PKG_CHECK_MODULES([GLib], [glib-2.0])

M lambda-c/src/Makefile.am => lambda-c/src/Makefile.am +9 -0
@@ 5,5 5,14 @@ LDADD = $(GLib_LIBS)
bin_PROGRAMS = lambda

lambda_SOURCES = lambda.c \
	lexer.h \
	eval.c eval.h \
	parser.c parser.h \
	term.c term.h

EXTRA_DIST = lexer.rl
CLEANFILES = lexer.c
nodist_lambda_SOURCES = lexer.c

lexer.c: lexer.rl
	$(AM_V_GEN)$(RAGEL) -C $< -o $@

M lambda-c/src/lambda.c => lambda-c/src/lambda.c +7 -9
@@ 19,21 19,19 @@
#include "config.h"

#include "eval.h"
#include "lexer.h"
#include "term.h"

#include <glib.h>
#include <stdio.h>

#include "lexer.h"
#include "parser.h"

int main(int argc, char *argv[]) {
  g_autoptr(Term) t =
    term_app(term_app(term_lam("w",
                               term_lam("x",
                                        term_lam("y",
                                                 term_lam("z",
                                                          term_app(term_var("x"),
                                                                   term_var("w")))))),
                      term_var("y")),
             term_var("z"));
  g_autoptr(GArray) tokens = lex("(\\x . x)");
  g_autoptr(Term) t = parse(tokens);

  term_fput(t, stdout);
  puts("");
  g_autoptr(Term) e = eval(t);

A lambda-c/src/lexer.h => lambda-c/src/lexer.h +29 -0
@@ 0,0 1,29 @@
#ifndef LEXER_H
#define LEXER_H

#include <glib.h>

enum token_type {
  TOKEN_LAMBDA,
  TOKEN_DOT,
  TOKEN_LPAREN,
  TOKEN_RPAREN,
  TOKEN_VAR
};

struct token {
  enum token_type type;
  gchar *v;
};

void token_lambda(struct token *t);
void token_dot(struct token *t);
void token_lparen(struct token *t);
void token_rparen(struct token *t);
void token_var(struct token *t, gchar *v); /* takes ownership of v */
void token_clear(struct token *t);
void token_puts(const struct token *t);

GArray* lex(const gchar *data);

#endif

A lambda-c/src/lexer.rl => lambda-c/src/lexer.rl +115 -0
@@ 0,0 1,115 @@
/* -*- c -*- */
#include "config.h"
#include "lexer.h"

#include <stdio.h>
#include <string.h>

%%{

machine main;

main := |*
  '\\' => {
    token_lambda(&t);
    g_array_append_val(r, t);
  };
  '.' => {
    token_dot(&t);
    g_array_append_val(r, t);
  };
  '(' => {
    token_lparen(&t);
    g_array_append_val(r, t);
  };
  ')' => {
    token_rparen(&t);
    g_array_append_val(r, t);
  };
  alpha+ => {
    token_var(&t, g_strndup(ts, te - ts));
    g_array_append_val(r, t);
  };
  space;
*|;

}%%

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-const-variable"
%% write data;
#pragma GCC diagnostic pop

void
token_lambda(struct token *t) {
  t->type = TOKEN_LAMBDA;
}

void
token_dot(struct token *t) {
  t->type = TOKEN_DOT;
}

void
token_lparen(struct token *t) {
  t->type = TOKEN_LPAREN;
}

void
token_rparen(struct token *t) {
  t->type = TOKEN_RPAREN;
}

void
token_var(struct token *t, gchar *v) {
  t->type = TOKEN_VAR;
  t->v = v;
}

void
token_clear(struct token *t) {
  if (t->type == TOKEN_VAR) g_free(t->v);
}

void
token_puts(const struct token *t) {
  switch (t->type) {
  case TOKEN_LAMBDA:
    printf("Lambda\n");
    return;
  case TOKEN_DOT:
    printf("Dot\n");
    return;
  case TOKEN_LPAREN:
    printf("Lparen\n");
    return;
  case TOKEN_RPAREN:
    printf("Rparen\n");
    return;
  case TOKEN_VAR:
    printf("Var(%s)\n", t->v);
    return;
  default: g_assert_not_reached();
  }
}

static void
token_destroy_notify(gpointer data) {
  token_clear(data);
}

GArray*
lex(const gchar *data) {
  gint cs;
  #pragma GCC diagnostic push
  #pragma GCC diagnostic ignored "-Wunused-but-set-variable"
  gint act;
  #pragma GCC diagnostic pop
  const gchar *ts, *te, *p = data, *pe = data + strlen(data), *eof = pe;
  GArray *r = g_array_new(FALSE, FALSE, sizeof(struct token));
  g_array_set_clear_func(r, token_destroy_notify);
  struct token t;
  %% write init;
  %% write exec;
  return r;
}

A lambda-c/src/parser.c => lambda-c/src/parser.c +70 -0
@@ 0,0 1,70 @@
#include "config.h"
#include "parser.h"

#include "lexer.h"

struct parser_state {
  const GArray *tokens;
  guint token_idx;
};

static const struct token*
peek_token(struct parser_state *st) {
  g_assert(st->token_idx < st->tokens->len);
  return &g_array_index(st->tokens, const struct token, st->token_idx);
}

static const struct token*
expect_token(struct parser_state *st, enum token_type expected) {
  const struct token *r = peek_token(st);
  st->token_idx++;
  if (r->type != expected) abort();
  return r;
}

static Term* parse_rec(struct parser_state *st);

static Term* parse_var(struct parser_state *st) {
  const struct token *t = expect_token(st, TOKEN_VAR);
  return term_var(t->v);
}

static Term* parse_app(struct parser_state *st) {
  Term *t1 = parse_rec(st);
  Term *t2 = parse_rec(st);
  return term_app(t1, t2);
}

static Term* parse_lam(struct parser_state *st) {
  expect_token(st, TOKEN_LAMBDA);
  const struct token *v = expect_token(st, TOKEN_VAR);
  expect_token(st, TOKEN_DOT);
  Term *t = parse_rec(st);
  return term_lam(v->v, t);
}

static Term* parse_rec(struct parser_state *st) {
  const struct token *t = peek_token(st);
  switch (t->type) {
  case TOKEN_VAR: return parse_var(st);
  case TOKEN_LAMBDA: return parse_lam(st);
  case TOKEN_LPAREN: {
    expect_token(st, TOKEN_LPAREN);
    Term *r = parse_rec(st);
    expect_token(st, TOKEN_RPAREN);
    return r;
  }
  case TOKEN_RPAREN:
  case TOKEN_DOT:
    abort();
  default: g_assert_not_reached();
  }
}

parse(const GArray *tokens) {
  struct parser_state st = {
    .tokens = tokens,
    .token_idx = 0
  };
  return parse_rec(&st);
}

A lambda-c/src/parser.h => lambda-c/src/parser.h +10 -0
@@ 0,0 1,10 @@
#ifndef PARSER_H
#define PARSER_H

#include <glib.h>

#include "term.h"

Term* parse(const GArray /* of struct token */ *tokens);

#endif