#define UTF8_STEP_MALFORMED -1
#define UTF8_STEP_INCOMPLETE -2
/* Try to decode the UTF-8 sequence pointed to by `*buf` while incrementing it
* to point to after the decoded sequence. Returns either the
* corresponding codepoint if everything went fine, or one of the above defined
* buf: the pointer to the string starting with an UTF-8 sequence. It'll be
* modified to point to the char after the decoded sequence
* Return: the corresponding codepoint if everything went fine, or one of the
* above defined errors otherwise.
int64_t utf8_decode_step(const char **buf);
/* Same as utf8_step(), but aborts when encountering an error */
static inline uint32_t xutf8_decode_step(const char **buf)
const int64_t ret = utf8_decode_step(buf);
/* Encode codep to out and return the number of bytes used */
uint8_t utf8_encode(uint32_t codep, char *out);
/* Get the number of bytes in an UTF-8 sequence from its first byte. Returns 0
* if a continuing byte was passed */
uint8_t utf8_codep_len(char firstbyte);
/* Step one codepoint forward, imply that *buf is on the beginning of a
* codepoint. Doesn't check for end of string; useless if buf is a well
* encoded UTF-8 string */
void utf8_step(const char **buf);
/* Step back to point to the previous UTF-8 sequence. Doesn't check for
* beginning of buffer special case: your task */
void utf8_unstep(const char **buf);
/* Return the lowercase version of titlecase and uppercase codepoints
* using a LUT */
uint32_t codep_tolower(uint32_t c);
/* The following function define the column width of an ISO 10646
* character as follows:
* - The null character (U+0000) has a column width of 0.
* - Other C0/C1 control characters and DEL will lead to a return
* value of -1.
* - Non-spacing and enclosing combining characters (general
* category code Mn or Me in the Unicode database) have a
* column width of 0.
* - SOFT HYPHEN (U+00AD) has a column width of 1.
* - Other format characters (general category code Cf in the Unicode
* database) and ZERO WIDTH SPACE (U+200B) have a column width of 0.
* - Hangul Jamo medial vowels and final consonants (U+1160-U+11FF)
* have a column width of 0.
* - Spacing characters in the East Asian Wide (W) or East Asian
* Full-width (F) category as defined in Unicode Technical
* Report #11 have a column width of 2.
* - All remaining characters (including all printable
* ISO 8859-1 and WGL4 characters, Unicode control characters,
* etc.) have a column width of 1.
* This implementation assumes that uint32_t characters are encoded
* in ISO 10646.
int codep_width(uint32_t ch);