From db90058840223f15d7460086fd4b81a7aa3c886b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=A1=D1=82=D1=80=D0=B0=D1=85=D0=B8=D1=9A=D0=B0=20=D0=A0?= =?UTF-8?q?=D0=B0=D0=B4=D0=B8=D1=9B?= Date: Sat, 6 Jul 2024 12:23:21 +0000 Subject: [PATCH] utf8.[ch]: *_char_* -> *_rune_*; (u32_to_u8,u32_strlen): Add max --- utf8.c | 20 ++++++++++---------- utf8.h | 13 ++++++++----- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/utf8.c b/utf8.c index af1215f..aff8559 100644 --- a/utf8.c +++ b/utf8.c @@ -42,7 +42,7 @@ const int utf_length_table[256] = { const int extract_masks[6] = {XMASK1, XMASK2, XMASK3, XMASK4, XMASK5, XMASK6}; int -u8_char_to_u32(u32* to, const u8* from, size_t* from_delta) +u8_rune_to_u32(u32* to, const u8* from, size_t* from_delta) { const u8* pfrom = from; if (!from) @@ -64,7 +64,7 @@ u8_char_to_u32(u32* to, const u8* from, size_t* from_delta) } int -u32_char_to_u8(u8* to, const u32 from) +u32_rune_to_u8(u8* to, const u32 from) { u32 cfrom = from; u8 start = 0; @@ -109,7 +109,7 @@ u32_char_to_u8(u8* to, const u32 from) } int -u8_to_u32(u32* to, const u8* from, size_t* from_delta) +u8_to_u32(u32* to, const u8* from, const ssize_t max, size_t* from_delta) { const u8* pfrom = from; u32* pto = to; @@ -117,9 +117,9 @@ u8_to_u32(u32* to, const u8* from, size_t* from_delta) return 1; size_t delta = 0; *from_delta = 0; - while (*pfrom) + while (pfrom - from < max && *pfrom) { - int result = u8_char_to_u32(pto, pfrom, &delta); + int result = u8_rune_to_u32(pto, pfrom, &delta); if (result) return result; pto++; @@ -131,16 +131,16 @@ u8_to_u32(u32* to, const u8* from, size_t* from_delta) } int -u32_to_u8(u8* to, const u32* from) +u32_to_u8(u8* to, const u32* from, const ssize_t max) { const u32* pfrom = from; u8* pto = to; int len = 0; if (!from) return 0; - while (*pfrom) + while (pfrom - from < max && *pfrom) { - size_t delta = u32_char_to_u8(pto, *pfrom); + size_t delta = u32_rune_to_u8(pto, *pfrom); pto += delta; len += delta; pfrom++; @@ -149,10 +149,10 @@ u32_to_u8(u8* to, const u32* from) } size_t -u32_strlen(const u32* s) +u32_strlen(const u32* s, const ssize_t max) { const u32* ps = s; - while (ps && *ps) + while (ps && ps - s < max && *ps) ps++; return ps - s; } diff --git a/utf8.h b/utf8.h index a3a08ce..8691a66 100644 --- a/utf8.h +++ b/utf8.h @@ -5,6 +5,9 @@ #include #include +/* maximum chars for UTF-8 representations of Unicode chars, per Unicode char */ +#define UTF8REPMAX 6 + #define XMASK1 0x7F /* b01111111 */ #define XMASK2 0x1F /* b00011111 */ #define XMASK3 0x0F /* b00001111 */ @@ -34,8 +37,8 @@ typedef uint8_t u8; typedef uint32_t u32; -int u8_char_to_u32(u32* to, const u8* from, size_t* from_delta); -int u32_char_to_u8(u8* to, const u32 from); -int u8_to_u32(u32* to, const u8* from, size_t* from_delta); -int u32_to_u8(u8* to, const u32* from); -size_t u32_strlen(const u32* s); +int u8_rune_to_u32(u32* to, const u8* from, size_t* from_delta); +int u32_rune_to_u8(u8* to, const u32 from); /* max == 1 */ +int u8_to_u32(u32* to, const u8* from, const ssize_t max, size_t* from_delta); +int u32_to_u8(u8* to, const u32* from, const ssize_t max); +size_t u32_strlen(const u32* s, const ssize_t max); -- 2.45.2