~donmcc/ood

7173f905d5d3c917f3ebdfd789c901ac0ec80f67 — Don McCaughey 1 year, 8 months ago f86ce16 request-parser
A spike to investigate HTTP request parsing.

Experiments with a `request` struct to represent a parsed HTTP request
and corresponding parsing functions using recursive descent.

Added experimental `buffer` struct for managing the position of the
input buffer being parsed.
M src/dood/CMakeLists.txt => src/dood/CMakeLists.txt +5 -1
@@ 2,6 2,8 @@ add_executable(dood
        main.c
        server.h
        server.c
        request.h
        request.c
        )
target_link_libraries(dood
        libood


@@ 11,7 13,9 @@ add_executable(dood_tests
        dood_tests.c
        server.h
        server.c
        )
        request.h
        request.c
        request_test.c)
target_link_libraries(dood_tests
        libood
        )

M src/dood/dood_tests.c => src/dood/dood_tests.c +4 -0
@@ 1,8 1,12 @@
#include <stdlib.h>


void request_test(void);


int
main(int argc, char *argv[])
{
    request_test();
    return EXIT_SUCCESS;
}

A src/dood/request.c => src/dood/request.c +424 -0
@@ 0,0 1,424 @@
#include "request.h"

#include <ctype.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <stdio.h>
#include "ood/ptr_array.h"


static bool
index_is_at_mark(struct buffer *buffer)
{
    return buffer->i == buffer->mark;
}


struct buffer *
buffer_alloc(char *data, int size)
{
    struct buffer *buffer = calloc(1, sizeof(struct buffer));
    if (!buffer)  return NULL;

    buffer->data = data;
    buffer->size = size;

    return buffer;
}


void
buffer_free(struct buffer *buffer)
{
    free(buffer);
}


int
buffer_getc(struct buffer *buffer)
{
    if (buffer->i == buffer->size) return -1;
    return buffer->data[buffer->i++];
}


void
buffer_goto_mark(struct buffer *buffer)
{
    buffer->i = buffer->mark;
}


void
buffer_mark_at(struct buffer *buffer, int i)
{
    buffer->i = i;
    buffer->mark = i;
}


void
buffer_set_mark(struct buffer *buffer)
{
    buffer->mark = buffer->i;
}


bool
buffer_matches_string(struct buffer *buffer, char const *s)
{
    assert(index_is_at_mark(buffer));
    assert(s);
    assert(*s);

    while (*s) {
        if (*s != buffer_getc(buffer)) {
            buffer_goto_mark(buffer);
            return false;
        }
        ++s;
    }
    buffer_set_mark(buffer);
    return true;
}


int
buffer_last_index_of_ctype(struct buffer *buffer, is_ctype_fn is_ctype)
{
    assert(index_is_at_mark(buffer));

    int i = buffer->i;
    while (i < buffer->size && is_ctype(buffer->data[i])) ++i;
    return i;
}


bool
buffer_skip_token_matching_ctype(struct buffer *buffer, is_ctype_fn is_ctype)
{
    int end = buffer_last_index_of_ctype(buffer, is_ctype);
    int len = end - buffer->i;
    buffer_mark_at(buffer, end);
    return len > 0;
}


char *
buffer_alloc_token_matching_ctype(struct buffer *buffer, is_ctype_fn is_ctype)
{
    int end = buffer_last_index_of_ctype(buffer, is_ctype);
    int len = end - buffer->i;
    if (!len) return NULL;

    char *s = malloc(len + 1);
    if (!s) return NULL;

    memcpy(s, buffer->data + buffer->i, len);
    s[len] = '\0';
    buffer_mark_at(buffer, end);
    return s;
}


struct message_header *
message_header_alloc(void)
{
    struct message_header *message_header = calloc(1, sizeof(struct message_header));
    if (!message_header) return NULL;
    return message_header;
}


void
message_header_free(struct message_header *message_header)
{
    if (message_header) {
        free(message_header->field_name);
        free(message_header->field_content);
        free(message_header);
    }
}


struct request *
request_alloc(void)
{
    struct request *request = calloc(1, sizeof(struct request));
    if (!request) return NULL;

    request->message_headers = ood_ptr_array_alloc(16);
    if (!request->message_headers) {
        free(request);
        return NULL;
    }

    return request;
}


void
request_free(struct request *request)
{
    if (request) {
        free(request->request_line.request_uri);

        int i = 0;
        struct message_header *message_header = NULL;
        while (ood_ptr_array_next(request->message_headers, &i, (void const **)&message_header)) {
            message_header_free(message_header);
        }
        ood_ptr_array_free(request->message_headers);

        free(request->message_body.octets);
        free(request);
    }
}


static enum ood_result
parse_crlf(struct request *request, struct buffer *buffer)
{
    return buffer_matches_string(buffer, "\r\n") ? ood_okay : ood_error;
}


static enum ood_result
parse_sp(struct request *request, struct buffer *buffer)
{
    return buffer_matches_string(buffer, " ") ? ood_okay : ood_error;
}


static enum ood_result
parse_method(struct request *request, struct buffer *buffer)
{
    if (buffer_matches_string(buffer, "GET")) {
        request->request_line.method = method_get;
        return ood_okay;
    }
    if (buffer_matches_string(buffer, "POST")) {
        request->request_line.method = method_post;
        return ood_okay;
    }
    if (buffer_matches_string(buffer, "PUT")) {
        request->request_line.method = method_put;
        return ood_okay;
    }
    if (buffer_matches_string(buffer, "DELETE")) {
        request->request_line.method = method_delete;
        return ood_okay;
    }
    return ood_error;
}


static int
is_uri_char(int ch)
{
    return '!' == ch
        || (ch >= '$' && ch <= ';')
        || '=' == ch
        || (ch >= '?' && ch <= 'Z')
        || '_' == ch
        || (ch >= 'a' && ch <= 'z')
        || '~' == ch;
}


static enum ood_result
parse_request_uri(struct request *request, struct buffer *buffer)
{
    char *uri = buffer_alloc_token_matching_ctype(buffer, is_uri_char);
    if (uri) {
        request->request_line.request_uri = uri;
        return ood_okay;
    }
    return ood_error;
}


static enum ood_result
parse_http_version(struct request *request, struct buffer *buffer)
{
    if (buffer_matches_string(buffer, "HTTP/1.1")) {
        request->request_line.http_version = http_version_1_1;
        return ood_okay;
    }
    return ood_error;
}


static enum ood_result
parse_request_line(struct request *request, struct buffer *buffer)
{
    if (!parse_method(request, buffer)) return ood_error;
    if (!parse_sp(request, buffer)) return ood_error;
    if (!parse_request_uri(request, buffer)) return ood_error;
    if (!parse_sp(request, buffer)) return ood_error;
    if (!parse_http_version(request, buffer)) return ood_error;
    if (!parse_crlf(request, buffer)) return ood_error;
    return ood_okay;
}


static int
is_token_char(int ch)
{
//    TOKEN = "!" | "#".."'" | "*".."." | "0".."9" | "A".."Z" | "^" | "_" | "`"
//          | "a".."z" | "|" | "~"
    return '!' == ch
        || (ch >= '#' && ch <= '\'')
        || (ch >= '*' && ch <= '.')
        || (ch >= '0' && ch <= '9')
        || (ch >= 'A' && ch <= 'Z')
        || '^' == ch
        || '_' == ch
        || '`' == ch
        || (ch >= 'a' && ch <= 'z')
        || '|' == ch
        || '~' == ch
        ;
}


static enum ood_result
parse_field_name(struct request *request, struct buffer *buffer)
{
    char *field_name = buffer_alloc_token_matching_ctype(buffer, is_token_char);
    if (field_name) {
        struct message_header *message_header = message_header_alloc();
        if (!message_header) {
            free(field_name);
            return ood_error;
        }
        message_header->field_name = field_name;
        if (!ood_ptr_array_add(request->message_headers, message_header)) {
            message_header_free(message_header);
            return ood_error;
        }
    }
    return ood_okay;
}


static int
is_horizontal_space(int ch)
{
    return '\t' == ch || ' ' == ch;
}


static enum ood_result
parse_message_sep(struct request *request, struct buffer *buffer)
{
    if (!buffer_matches_string(buffer, ":")) return ood_error;
    buffer_skip_token_matching_ctype(buffer, is_horizontal_space);
    return ood_okay;
}


static int
is_content(int ch)
{
    return ch >= '!' && ch <= '~';
}


static enum ood_result
parse_field_content(struct request *request, struct buffer *buffer)
{
    char *field_content = buffer_alloc_token_matching_ctype(buffer, is_content);
    if (!field_content) return ood_error;

    struct message_header *message_header = (struct message_header *)ood_ptr_array_last_item(request->message_headers);
    assert(message_header);
    if (message_header->field_content) {
        char *s = NULL;
        int chars_formatted = asprintf(&s, "%s%s", message_header->field_content, field_content);
        if (chars_formatted < 0) {
            free(field_content);
            return ood_error;
        }
        free(message_header->field_content);
        message_header->field_content = s;
    } else {
        message_header->field_content = field_content;
    }

    return ood_okay;
}


static enum ood_result
parse_field_value(struct request *request, struct buffer *buffer)
{
    if (!parse_field_content(request, buffer)) return ood_error;

    if (buffer_skip_token_matching_ctype(buffer, is_horizontal_space)) {
        struct message_header *message_header = (struct message_header *)ood_ptr_array_last_item(request->message_headers);
        assert(message_header);
        char *s = NULL;
        if (message_header->field_content) {
            int chars_formatted = asprintf(&s, "%s ", message_header->field_content);
            if (chars_formatted < 0) return ood_error;
            free(message_header->field_content);
        } else {
            int chars_formatted = asprintf(&s, "  ");
            if (chars_formatted < 0) return ood_error;
        }
        message_header->field_content = s;
    }

    parse_field_value(request, buffer);

    return ood_okay;
}


static enum ood_result
parse_message_end(struct request *request, struct buffer *buffer)
{
    buffer_skip_token_matching_ctype(buffer, is_horizontal_space);
    return parse_crlf(request, buffer);
}


static enum ood_result
parse_message_header(struct request *request, struct buffer *buffer)
{
    if (!parse_field_name(request, buffer)) return ood_error;
    if (!parse_message_sep(request, buffer)) return ood_error;
    parse_field_value(request, buffer);
    if (!parse_message_end(request, buffer)) return ood_error;
    return ood_okay;
}


static enum ood_result
parse_message_headers(struct request *request, struct buffer *buffer)
{
    while(parse_message_header(request, buffer)) {
        // loop
    }
    return parse_crlf(request, buffer);
}


static enum ood_result
parse_message_body(struct request *request, struct buffer *buffer)
{
    
    return ood_okay;
}


enum ood_result
request_parse(struct request *request, struct buffer *buffer)
{
    if (!parse_request_line(request, buffer)) return ood_error;
    if (!parse_message_headers(request, buffer)) return ood_error;
    parse_message_body(request, buffer);
    return ood_okay;
}

A src/dood/request.h => src/dood/request.h +197 -0
@@ 0,0 1,197 @@
#ifndef DOOD_REQUEST_H_INCLUDED
#define DOOD_REQUEST_H_INCLUDED


#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include "ood/result.h"


struct ood_ptr_array;


/*
  Request Grammar:

    request = request_line message_headers
            | request_line message_headers message_body

    request_line = method SP request_uri SP http_version CRLF

    method = "GET" | "POST" | "PUT" | "DELETE"

    request_uri = URI+

    http_version = "HTTP/1.1"

    message_headers = CRLF
                    | message_header CRLF
                    | message_header message_headers

    message_header = field_name message_sep message_end
                   | field_name message_sep field_value message_end

    message_sep = ":"
                | ":" HSP+

    message_end = CRLF
                | HSP+ CRLF

    field_name = TOKEN+

    field_value = field_content
                | field_content HSP field_value

    field_content = CONTENT+

    message_body = OCTET+

    CONTENT = "!".."~"
    CRLF = "\r\n"
    CTL = "\0".."\x1f" | "\x7f"
    HT = "\t"
    HSP = SP | HT
    OCTET = "\0".."\xff"
    SP = " "
    TOKEN = "!" | "#".."'" | "*".."." | "0".."9" | "A".."Z" | "^" | "_" | "`"
          | "a".."z" | "|" | "~"
    URI = "!" | "$"..";" | "=" | "?".."Z" | "_" | "a".."z" | "~"

  Unused Definitions:

    LWS = ( SP | HT )+
        | CRLF ( SP | HT )+
    SEPARATOR = HT | SP | "\"" | "(" | ")" | "," | "/" | ":" | ";" | "<"  | "="
              | ">" | "?" | "@" | "[" | "\" | "]" | "{" | "}"
    TEXT = "\t" | " ".."~"

 Unused URI Definitions:
    DIGIT = "0".."9"
    HEX = DIGIT | "A".."F" | "a".."f"
    LOWALPHA = "A".."Z"
    UPALPHA = "a".."z"
    MARK = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
    RESERVED = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | ","

    alpha = LOWALPHA | UPALPHA
    alphanum = alpha | DIGIT

    unreserved = alphanum | MARK
    escaped = "%" HEX HEX

    uric = RESERVED | unreserved | escaped

  Excluded URI Definitions:
    CONTROL = "\0".."\x1f" | "\x7f"
    DELIMS = "\"" | "#" | "%" | "<" | ">"
    SPACE = " "
    UNWISE = "[" | "\" | "]" | "^" | "`" | "{" | "|" | "}"

    NON_URI = "\0".." " | "\"" | "#" | "<" | ">" | "[".."^" | "`" | "{".."}"
            | "\x7f"
*/


enum method {
    method_unknown = 0,
    method_get,
    method_post,
    method_put,
    method_delete,
};


enum http_version {
    http_version_unknown = 0,
    http_version_1_1,
};


typedef int (is_ctype_fn)(int ch);


struct buffer {
    char *data;
    size_t size;
    int i;
    int mark;
};


struct buffer *
buffer_alloc(char *data, int size);

void
buffer_free(struct buffer *buffer);

int
buffer_getc(struct buffer *buffer);

void
buffer_goto_mark(struct buffer *buffer);

void
buffer_mark_at(struct buffer *buffer, int i);

void
buffer_set_mark(struct buffer *buffer);

bool
buffer_matches_string(struct buffer *buffer, char const *s);

int
buffer_last_index_of_ctype(struct buffer *buffer, is_ctype_fn is_ctype);

bool
buffer_skip_token_matching_ctype(struct buffer *buffer, is_ctype_fn is_ctype);

char *
buffer_alloc_token_matching_ctype(struct buffer *buffer, is_ctype_fn is_ctype);


struct request_line {
    enum method method;
    char *request_uri;
    enum http_version http_version;
};


struct message_header {
    char *field_name;
    char *field_content;
};


struct message_header *
message_header_alloc(void);


void
message_header_free(struct message_header *message_header);


struct message_body {
    void *octets;
    size_t size;
};


struct request {
    struct request_line request_line;
    struct ood_ptr_array *message_headers;
    struct message_body message_body;
};


struct request *
request_alloc(void);

void
request_free(struct request *request);

enum ood_result
request_parse(struct request *request, struct buffer *buffer);


#endif

A src/dood/request_test.c => src/dood/request_test.c +285 -0
@@ 0,0 1,285 @@
#include <assert.h>
#include <ctype.h>
#include <stdlib.h>
#include <string.h>

#include "request.h"
#include "ood.h"


static void
test_buffer_alloc(void)
{
    char data[] = "foo bar";
    size_t data_size = sizeof data - 1;
    struct buffer *buffer = buffer_alloc(data, data_size);

    assert(data == buffer->data);
    assert(data_size == buffer->size);
    assert(0 == buffer->i);
    assert(0 == buffer->mark);

    buffer_free(buffer);
}


static void
test_buffer_getc(void)
{
    char data[] = "abc";
    size_t data_size = sizeof data - 1;
    struct buffer *buffer = buffer_alloc(data, data_size);

    assert('a' == buffer_getc(buffer));
    assert(1 == buffer->i);
    assert(0 == buffer->mark);

    assert('b' == buffer_getc(buffer));
    assert(2 == buffer->i);
    assert(0 == buffer->mark);

    assert('c' == buffer_getc(buffer));
    assert(3 == buffer->i);
    assert(0 == buffer->mark);

    assert(-1 == buffer_getc(buffer));
    assert(3 == buffer->i);
    assert(0 == buffer->mark);

    buffer_free(buffer);
}


static void
test_buffer_set_mark(void)
{
    char data[] = "abc";
    size_t data_size = sizeof data - 1;
    struct buffer *buffer = buffer_alloc(data, data_size);

    assert('a' == buffer_getc(buffer));
    assert('b' == buffer_getc(buffer));

    buffer_set_mark(buffer);
    assert(2 == buffer->i);
    assert(2 == buffer->mark);

    assert('c' == buffer_getc(buffer));
    assert(3 == buffer->i);
    assert(2 == buffer->mark);

    assert(-1 == buffer_getc(buffer));
    assert(3 == buffer->i);
    assert(2 == buffer->mark);

    buffer_set_mark(buffer);
    assert(3 == buffer->i);
    assert(3 == buffer->mark);

    buffer_free(buffer);
}


static void
test_buffer_goto_mark(void)
{
    char data[] = "abc";
    size_t data_size = sizeof data - 1;
    struct buffer *buffer = buffer_alloc(data, data_size);

    assert('a' == buffer_getc(buffer));
    assert('b' == buffer_getc(buffer));

    buffer_goto_mark(buffer);

    assert(0 == buffer->i);
    assert(0 == buffer->mark);

    assert('a' == buffer_getc(buffer));
    assert('b' == buffer_getc(buffer));

    buffer_set_mark(buffer);

    assert('c' == buffer_getc(buffer));
    assert(-1 == buffer_getc(buffer));

    buffer_goto_mark(buffer);

    assert(2 == buffer->i);
    assert(2 == buffer->mark);

    buffer_free(buffer);
}


static void
test_buffer_matches_string(void)
{
    char data[] = "foo bar";
    size_t data_size = sizeof data - 1;
    struct buffer *buffer = buffer_alloc(data, data_size);

    assert(buffer_matches_string(buffer, "foo"));

    assert(3 == buffer->i);
    assert(3 == buffer->mark);

    assert(!buffer_matches_string(buffer, "bar"));

    assert(3 == buffer->i);
    assert(3 == buffer->mark);

    assert(' ' == buffer_getc(buffer));
    buffer_set_mark(buffer);

    assert(buffer_matches_string(buffer, "bar"));

    assert(7 == buffer->i);
    assert(7 == buffer->mark);

    assert(-1 == buffer_getc(buffer));

    buffer_free(buffer);
}


static void
test_buffer_last_index_of_ctype(void)
{
    char data[] = "123 abc";
    int data_size = sizeof data - 1;
    struct buffer *buffer = buffer_alloc(data, data_size);

    int end = buffer_last_index_of_ctype(buffer, isalpha);
    assert(0 == end);
    assert(0 == buffer->i);
    assert(0 == buffer->mark);

    end = buffer_last_index_of_ctype(buffer, isdigit);
    assert(3 == end);
    assert(0 == buffer->i);
    assert(0 == buffer->mark);

    buffer_mark_at(buffer, 3);

    end = buffer_last_index_of_ctype(buffer, isspace);
    assert(4 == end);
    assert(3 == buffer->i);
    assert(3 == buffer->mark);

    buffer_mark_at(buffer, 4);

    end = buffer_last_index_of_ctype(buffer, isalpha);
    assert(7 == end);
    assert(4 == buffer->i);
    assert(4 == buffer->mark);

    buffer_free(buffer);
}


static void
test_buffer_alloc_token_matching_ctype(void)
{
    char data[] = "123 abc";
    int data_size = sizeof data - 1;
    struct buffer *buffer = buffer_alloc(data, data_size);

    char *token = buffer_alloc_token_matching_ctype(buffer, isalpha);
    assert(NULL == token);

    token = buffer_alloc_token_matching_ctype(buffer, isdigit);
    assert(token);
    assert(0 == strcmp("123", token));
    free(token);

    token = buffer_alloc_token_matching_ctype(buffer, isspace);
    assert(token);
    assert(0 == strcmp(" ", token));
    free(token);

    token = buffer_alloc_token_matching_ctype(buffer, isalpha);
    assert(token);
    assert(0 == strcmp("abc", token));
    free(token);

    buffer_free(buffer);
}


static void
test_request_alloc(void)
{
    struct request *request = request_alloc();

    assert(request);

    assert(method_unknown == request->request_line.method);
    assert(NULL == request->request_line.request_uri);
    assert(http_version_unknown == request->request_line.http_version);

    assert(request->message_headers);
    assert(0 == request->message_headers->count);

    assert(!request->message_body.octets);
    assert(0 == request->message_body.size);

    request_free(request);
}


static void
test_request_parse_get(void)
{
    char data[] = "GET / HTTP/1.1\r\n"
                  "Host: localhost:8000\r\n"
                  "User-Agent: curl/7.80.0\r\n"
                  "Accept: */*\r\n"
                  "\r\n";
    size_t data_size = sizeof data - 1;
    struct buffer *buffer = buffer_alloc(data, data_size);
    struct request *request = request_alloc();

    enum ood_result result = request_parse(request, buffer);

    assert(ood_okay == result);
    assert(method_get == request->request_line.method);
    assert(0 == strcmp("/", request->request_line.request_uri));
    assert(http_version_1_1 == request->request_line.http_version);

    assert(3 == request->message_headers->count);

    int i = 0;
    struct message_header *message_header = NULL;

    assert(ood_ptr_array_next(request->message_headers, &i, (void const **)&message_header));
    assert(0 == strcmp("Host", message_header->field_name));
    assert(0 == strcmp("localhost:8000", message_header->field_content));

    assert(ood_ptr_array_next(request->message_headers, &i, (void const **)&message_header));
    assert(0 == strcmp("User-Agent", message_header->field_name));
    assert(0 == strcmp("curl/7.80.0", message_header->field_content));

    assert(ood_ptr_array_next(request->message_headers, &i, (void const **)&message_header));
    assert(0 == strcmp("Accept", message_header->field_name));
    assert(0 == strcmp("*/*", message_header->field_content));

    request_free(request);
    buffer_free(buffer);
}


void
request_test(void)
{
    test_buffer_alloc();
    test_buffer_getc();
    test_buffer_set_mark();
    test_buffer_goto_mark();
    test_buffer_matches_string();
    test_buffer_last_index_of_ctype();
    test_buffer_alloc_token_matching_ctype();

    test_request_alloc();
    test_request_parse_get();
}

M src/libood/ood/ptr_array.c => src/libood/ood/ptr_array.c +10 -0
@@ 75,3 75,13 @@ ood_ptr_array_next(struct ood_ptr_array const *array, int *i, void const **item_
    ++*i;
    return ood_okay;
}


void const *
ood_ptr_array_last_item(struct ood_ptr_array const *array)
{
    for (int i = array->capacity - 1; i >= 0; --i) {
        if (array->items[i]) return array->items[i];
    }
    return NULL;
}

M src/libood/ood/ptr_array.h => src/libood/ood/ptr_array.h +3 -0
@@ 31,5 31,8 @@ ood_ptr_array_clear(struct ood_ptr_array *array, void const *item);
enum ood_result
ood_ptr_array_next(struct ood_ptr_array const *array, int *i, void const **item_out);

void const *
ood_ptr_array_last_item(struct ood_ptr_array const *array);


#endif