~pixelherodev/DSM-9

ea72bfd7d94dd5efbb18c7e9fb3abe3a6fa09970 — Noam Preil a day ago e01de2e
tmp
6 files changed, 11 insertions(+), 452 deletions(-)

M Makefile
M dsm9.h
M due.c
M mkfile
D sm2.c
D sm2.h
M Makefile => Makefile +3 -3
@@ 6,13 6,13 @@ CFLAGS=-std=c17 -pedantic -Wall -O1 -march=native -g -D _POSIX_C_SOURCE=200809 -

all:due log

due: sm2.o due.o time.o util.o
due: due.o time.o util.o dsm9.o
	$(CC) -o $@ $^ -lrt $(CFLAGS)

log: sm2.o log.o time.o util.o
log: log.o time.o util.o dsm9.o
	$(CC) -o $@ $^ -lrt $(CFLAGS)

%.o: %.c sm2.h dsm9.h sb.h
%.o: %.c dsm9.h sb.h
	$(CC) $(CFLAGS) $(EXTRA_CFLAGS) -c $< -o $@

clean:

M dsm9.h => dsm9.h +4 -4
@@ 47,14 47,14 @@ int dsm9load(char *basepath, dsm9deck *deck);
void dsm9unload(dsm9deck*);

/* Fetches the name of a card, by index */
char *dsm9cardname(dsm9deck*, uint32_t);
char *dsm9cardname(dsm9deck*, uint16_t);
/* Fetches the front of a card, by index */
char *dsm9cardfront(dsm9deck*, uint32_t);
char *dsm9cardfront(dsm9deck*, uint16_t);
/* Fetches the back of a card, by index */
char *dsm9cardback(dsm9deck*, uint32_t);
char *dsm9cardback(dsm9deck*, uint16_t);
/*
	Appends a log entry to the log file, updates the card information, and flushes the card information to the DSM accelerator file.
*/
int dsm9log(dsm9deck*, uint32_t index, uint8_t grade, uint32_t timestamp);
int dsm9log(dsm9deck*, uint16_t index, uint8_t grade, uint32_t timestamp);



M due.c => due.c +0 -1
@@ 10,7 10,6 @@
#endif
#include <stdio.h>

#include "sm2.h"
#include "dsm9.h"
#include "util.h"


M mkfile => mkfile +4 -4
@@ 4,19 4,19 @@ CFLAGS=$CFLAGS -p -D_POSIX_SOURCE -D_PLAN9_SOURCE -D_PLAN9_$objtype -D_PLAN9_NOA

all:V: due log cli

cli: cli.$O sm2.$O time.$O util.$O
cli: cli.$O time.$O util.$O dsm9.$O
	$LD $LDFLAGS -o $target $prereq

due: due.$O sm2.$O time.$O util.$O
due: due.$O time.$O util.$O dsm9.$O
	$LD $LDFLAGS -o $target $prereq

log: log.$O sm2.$O time.$O util.$O
log: log.$O time.$O util.$O dsm9.$O
	$LD $LDFLAGS -o $target $prereq

clean:
	rm -f due log cli *.$O

%.$O:	%.c sb.h dsm9.h sm2.h
%.$O:	%.c sb.h dsm9.h
	$CC $CFLAGS $stem.c

install:V: /$objtype/bin/dsm9/due /$objtype/bin/dsm9/log /$objtype/bin/dsm9/cli

D sm2.c => sm2.c +0 -393
@@ 1,393 0,0 @@
#ifdef _PLAN9_SOURCE
#include <u.h>
#include <libc.h>
#include "9compat.h"
#else

#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

#endif

#include <stdio.h>

/* v2 header */
#include "dsm9.h"

#include "util.h"
#include "sb.h"

void
dsm9unload(dsm9deck *deck)
{
	free(deck->deckbuf);
	sbfree(deck->accel.new);
	sbfree(deck->fronts);
	if(deck->logs != NULL)
		fclose(deck->logs);
	memset(deck, 0, sizeof(*deck));
}

static void
dolog(dsm9deck *deck, uint32_t index, uint8_t grade, uint32_t timestamp)
{
	double e = deck->easinesses[index];
	double i = deck->intervals[index];
	double ts = timestamp;
	double g = grade;
	deck->reps[index] += 1;
	if(grade < 3){
		deck->dues[index] = 0;
		deck->intervals[index] = 1;
		printf("Already due: %d\n",index);
		pushu16(&deck->accel.due, index);
		return;
	}
	e = e - 0.8 + (0.28 * g) - (0.02 * g * g);
	if(e < 1.3)
		e = 1.3;
	deck->easinesses[index] = 1.3;
	deck->dues[index] = (uint32_t)(ts + ((double)60 * (double)60 * (double)24 * i * e));
	if(deck->dues[index] < time(NULL)){
		printf("Already due: %d\n",index);
		pushu16(&deck->accel.due, index);
	}
	deck->intervals[index] = deck->reps[index] == 1 ? 3 : deck->reps[index] == 2 ? 6 : deck->easinesses[index] * (double)deck->intervals[index];
}

static int
openlogs(dsm9deck *deck)
{
	if(deck->logs != NULL)
		return 1;
	char *path = aprintf("%s.log", deck->basepath);
	if(path == NULL)
		return 0;
	deck->logs = fopen(path, "a");
	free(path);
	if(deck->logs == NULL){
		fprintf(stderr, "Failed to open log for %s", deck->basepath);
		return 0;
	}
	return 1;
}

int
dsm9log(dsm9deck *deck, uint32_t index, uint8_t grade, uint32_t timestamp)
{
	char *id = dsm9cardname(deck, index);
	if(id == NULL)
		return 0;
	dolog(deck,index,grade,timestamp);
	if(!openlogs(deck))
		return 0;
	fprintf(deck->logs, "%s@@@%u@@@%u\n", id, timestamp, grade);
	fflush(deck->logs);
	free(id);
	return 1;
}

static int
fastidpush(uint32_t **ids, uint32_t id, uint16_t *index)
{
	if(*index == 0xFFFF)
		return 0;
	(*ids)[*index] = id;
	(*index) += 1;
	return 1;
}

static int
initids(dsm9deck *deck)
{
	/* Using strtok to split lines, then manually finding the fields, would mean reading entire lines into the cache at a time, then going back to the beginning of them, then repeating. Since cards can be arbitrarily large, and the *mean* size of a line in the test deck is over a kilobyte, this would be incredibly wasteful, especially since we only need to extract the indices anyways.  */
	char *c;
	uint32_t id = 0;
	uint16_t index = 0;
	if(!sbexactcapacity(&deck->ids, 0xFFFF, 4))
		return 0;
	for(c = deck->deckbuf; *c != 0; c += 1){
		if(*c == '\n'){
			if(!fastidpush(&deck->ids, id, &index))
				return 0;
			id = c - deck->deckbuf + 1;
		}
	}
	/* If the deck doesn't end with a newline, there may be a card remaining; if it's complete, push it */
	if(id != deck->decklen)
		if(!fastidpush(&deck->ids, id, &index))
			return 0;
	sbincrement(deck->ids, index);
	deck->cardnum = index;
	return 1;
}

static int
initcards(dsm9deck *deck)
{
	if(!initids(deck))
		return 0;
	/* The first line here isn't strictly needed, but relinquishing the bits of unused RAM is good practice. */
	if(!sbexactcapacity(&deck->ids, deck->cardnum, 4))
		return 0;
	if(!sbexactcapacity(&deck->fronts, deck->cardnum, 4))
		return 0;
	if(!sbexactcapacity(&deck->backs, deck->cardnum, 4))
		return 0;
	if(!sbexactcapacity(&deck->reps, deck->cardnum, 2))
		return 0;
	if(!sbexactcapacity(&deck->dues, deck->cardnum, 4))
		return 0;
	if(!sbexactcapacity(&deck->easinesses, deck->cardnum, sizeof(float)))
		return 0;
	if(!sbexactcapacity(&deck->intervals, deck->cardnum, 1))
		return 0;
	memset(deck->fronts, 0, deck->cardnum * 4);
	memset(deck->backs, 0, deck->cardnum * 4);
	memset(deck->reps, 0, deck->cardnum * 2);
	memset(deck->dues, 0, deck->cardnum * 4);
	memset(deck->intervals, 1, deck->cardnum);
	for(int i = 0; i < deck->cardnum; i += 1)
		deck->easinesses[i] = 2.5;
/* While we *could* mark all cards new at this point, that would require a lot of extra processing when we process logs. Instead, we run a pass to find new cards at the end, which just requires processing the `reps` fields - which is extremely efficient, and requires no indirection. */
	return 1;
}

static uint32_t
find(dsm9deck *deck, char *name)
{
	char *withdelim = aprintf("%s@@@", name);
	if(withdelim == NULL)
		return -1;
	for(uint32_t i = 0; i < deck->cardnum; i += 1)
		if(strncmp(withdelim, deck->deckbuf + deck->ids[i], strlen(withdelim)) == 0){
			free(withdelim);
 			return i;
	}
	printf("Unable to find card '%s', search str '%s'\n", name,withdelim);
	free(withdelim);
	return (uint32_t)-1;
}

/* Log entries have three fields: id, timestamp, grade */
/* Returns 0 on failure to process logs, 1 on success, and 2 if logs can't be loaded */
static int
logsload(dsm9deck *deck, char *basepath)
{
	delta();
	double dt;
	char *buf, *logpath = aprintf("%s.log", basepath);
	char *line, *delim, *id, *timestamp;
	uint32_t count = 0, l = 0, index, len;
	uint8_t grade;
	size_t size;
	if(logpath == NULL)
		return 0;
	buf = readfile(logpath, &size);
	free(logpath);
	if(buf == NULL){
		fprintf(stderr, "Unable to read logs for '%s', continuing without...\n", basepath);
		return 2;
	}
	line = strtok(buf, "\n");
	while(line != NULL){
		l += 1;
		delim = strstr(line, "@@@");
		if(delim == NULL){
			fprintf(stderr, "expected three fields on line %d, found one\n", l);
			return 0;
		}
		len = delim - line;
		id = malloc(len + 1);
		memcpy(id, line, len);
		id[len] = 0;
		line = delim + 3;
		delim = strstr(line, "@@@");
		if(delim == NULL){
			fprintf(stderr, "expected three fields on line %d, found two", l);
			free(id);
			return 0;
		}
		len = delim - line;
		timestamp = malloc(len + 1);
		memcpy(timestamp, line, len);
		timestamp[len] = 0;
		if(strlen(delim+3) != 1){
			fprintf(stderr, "grade must be a single digit");			
			free(id);
			free(timestamp);
			return 0;
		}
		grade = *(delim + 3) - '0';
		if(grade > 5){
			fprintf(stderr,"grade must be a single digit between 0 and 5 (inclusive)");			
			free(id);
			free(timestamp);
			return 0;
		}
		index = find(deck, id);
		if(index == (uint32_t)-1){
			printf("card '%s' unknown, ignoring log entry...\n", id);
		} else{
			dolog(deck, index, grade, atol(timestamp));
			count += 1;
		}
		line = strtok(NULL, "\n");
		free(id);
		free(timestamp);
	}
	dt = delta();
	printf("Loaded %d log entries in %fs\n", count, dt);
	return 1;
}

static int
accelload(dsm9deck *deck, char *basepath)
{
	char *dsmpath = aprintf("%s.dsm", basepath);
	size_t size;
	char *buf;
	if(dsmpath == NULL)
		return 0;
	buf = readfile(dsmpath, &size);
	if(buf == NULL)
		return 0;
	fprintf(stderr, "TODO: load accelerator\n");
	(void)deck;
	return 0;
}

static int
dsm9stream(dsm9deck *deck, uint32_t index)
{
	uint32_t id, front, back, end;
	int field = 0;
	/* For this, it'd be more efficient to have the whole card stored together - however, this is intended to be called rarely, and uses only a tiny portion of the cache, so it doesn't matter */
	/* It *might* be worth explicitly prefetching the various fields to reduce the odds of a cache miss, in theory, but this is - and I know I'm repeating myself here - fast enough that it doesn't matter */
	id = deck->ids[index];
	end = index + 1 != deck->cardnum ? deck->ids[index + 1] : deck->decklen;
	if(id + 3 >= end)
		return 0;
	for(uint32_t cindex = id; cindex < end - 3; cindex += 1){
		if(deck->deckbuf[cindex] == '@' && deck->deckbuf[cindex + 1] == '@' && deck->deckbuf[cindex + 2] == '@'){
			if(field == 0)
				front = cindex + 3;
			else if(field == 1)
				back = cindex + 3;
			field += 1;
		}
	}
	if(field < 2){
		if(field == 0)
			front = (uint32_t)-1;
		return 0;
	}
	deck->fronts[index] = front;
	deck->backs[index] = back;
	return 1;
}

char *
dsm9cardback(dsm9deck *deck, uint32_t index)
{
	/* For this, it'd be more efficient to have id and front stored together - however, this also only uses *eight bytes*, so the effect on the cache is neglible */
	uint32_t start = deck->backs[index];
	int notlast = index + 1 == deck->cardnum;
	uint32_t end = notlast ? deck->ids[index + 1] : deck->decklen;
	/* If card is invalid, or if the card is new and we cannot stream it, return NULL */
	if(start == (uint32_t)-1 || (start == 0 && !dsm9stream(deck, index)))
		return NULL;
	start = deck->backs[index];
	end = notlast ? deck->ids[index + 1] : deck->decklen;
	/* unlike name/front, back ends either at line ending or EOF */
	uint32_t size = end - start - (notlast ? 1 : 0);
	char *buf = malloc(size + 1);
	memcpy(buf, deck->deckbuf + start, size);
	buf[size] = 0;
	return buf;
}


char *
dsm9cardfront(dsm9deck *deck, uint32_t index)
{
	/* For this, it'd be more efficient to have id and front stored together - however, this also only uses *eight bytes*, so the effect on the cache is neglible */
	uint32_t start = deck->fronts[index];
	uint32_t end = deck->backs[index];
	/* If card is invalid, or if the card is new and we cannot stream it, return NULL */
	if(end == (uint32_t)-1 || (start == 0 && !dsm9stream(deck, index)))
		return NULL;
	start = deck->fronts[index];
	end = deck->backs[index];
	/* Delimiter is three bytes, and we don't want it */
	printf("start %d end %d\n",start,end);
	if(end <= start)
		return NULL;
	uint32_t size = end - start - 3;
	char *buf = malloc(size + 1);
	memcpy(buf, deck->deckbuf + start, size);
	buf[size] = 0;
	return buf;
}

char *
dsm9cardname(dsm9deck *deck, uint32_t index)
{
	/* For this, it'd be more efficient to have id and front stored together - however, this also only uses *eight bytes*, so the effect on the cache is neglible */
	uint32_t id = deck->ids[index];
	uint32_t end = deck->fronts[index];
	/* If card is invalid, or if the card is new and we cannot stream it, return NULL */
	if(end == (uint32_t)-1 || (end == 0 && !dsm9stream(deck, index)))
		return NULL;
	end = deck->fronts[index];
	/* Delimiter is three bytes, and we don't want it */
	uint32_t size = end - id - 3;
	char *buf = malloc(size + 1);
	memcpy(buf, deck->deckbuf + id, size);
	buf[size] = 0;
	return buf;
}

int
dsm9load(char *basepath, dsm9deck *deck)
{
	char *deckpath = aprintf("%s.deck", basepath);
	if(deckpath == NULL)
		return 0;
	dsm9unload(deck);
	deck->deckbuf = readfile(deckpath, &deck->decklen);
	deck->basepath = basepath;
	free(deckpath);
	if(deck->deckbuf == NULL){
		fprintf(stderr, "Unable to read deck for '%s'\n", basepath);
		return 0;
	}
	initcards(deck);
	if(!accelload(deck, basepath))
		switch(logsload(deck, basepath)){
		case 0:
			return 0;
		case 1:
			break;
		case 2:
			if(!sbexactcapacity(&deck->accel.new, deck->cardnum, 4))
				return 0;
			/* Not exactly the fastest method, but should be pretty fast anyways. Card count is at most 64k, so the loop should at worst take 64k iterations of 20 cycles at worst on a particularly crappy microarchitecture with an IPC of 0.25 (compare, conditional branch, store, increment, unconditional branch), or ~128K cycles - as the memory access is linear, this should basically always hit the cache. Even if my math is off by an order of magnitude, this should be measured in microseconds. */
			for(uint16_t i = 0; i < deck->cardnum; i += 1)
				deck->accel.new[i] = i;
			deck->accel.newcount = deck->cardnum;
			return 1;
		}
	if(!sbexactcapacity(&deck->accel.new, deck->cardnum, 4))
		return 0;
	for(uint16_t i = 0; i < deck->cardnum; i += 1)
		if(deck->reps[i] == 0)
			if(!pushu16(&deck->accel.new, i))
				return 0;
	deck->accel.newcount = sbcount(deck->accel.new);
	deck->accel.duecount = sbcount(deck->accel.due);
	return 1;
}


D sm2.h => sm2.h +0 -47
@@ 1,47 0,0 @@
struct sm2_card {
	char *id;
	char *path;
	char **fields;
	unsigned long field_count;
	unsigned long next_due;
	uint8_t interval;
	uint32_t repetitions;
	float easiness;
};

struct sm2_instance {
	uint32_t card_count, card_cap;
	// heap-allocated array of size card_count. capacity is always equal to size.
	struct sm2_card *cards;
	char *delimiter;
};

/*
	Call this before calling any other sm2_* functions.
*/
extern void sm2_init(struct sm2_instance*);

/*
	Loads a deck into the instance. If overwrite is set, any cards with IDs
	matching already-loaded cards will replace the previously known cards.
	If overwrite is not set, then cards with duplicate IDs will be treated as distinct copies of the same card. In other words, log entries for one are considered log entries for BOTH. Behavior is not fully defined for this scenario.
	
	This *will* crash (or worse!) if the instance hasn't been initialized.
	This overwrites the provided string with undefined contents. The caller
	is responsible for duplicating the string if this is unwanted.
*/
extern int sm2_load_deck(struct sm2_instance*, char *, int overwrite, char *path);

/*
	Loads and processes logs. While logs can be loaded before decks, any cards not already
	loaded will be ignored entirely. This allows for deleting cards from a deck without touching
	the logs.
*/
extern int sm2_load_logs(struct sm2_instance*, char*);

/*
	Deallocate all resources used by the SM-2 instance
*/
extern void sm2_deinit(struct sm2_instance*);

extern int sm2_log(struct sm2_instance *instance, char *id, uint8_t grade, long timestamp);