~nabijaczleweli/voreutils

111f8b20b2d6d0b734821c6eca5ec01aa0390834 — наб a month ago 185f8b1
Correctly take characters instead of bytes in printf 'X. Fix printf %d ' showing the environment as extra
4 files changed, 39 insertions(+), 12 deletions(-)

M README.md
M cmd/printf.cpp
M man/printf.1
M tests/printf/test
M README.md => README.md +2 -2
@@ 76,7 76,7 @@ GNU coreutils provide the following 106 binaries, according to `dpkg -L coreutil
  * ☑ /usr/bin/pinky – [#1016117: columnation broken, GECOS cut off in short mode?](//bugs.debian.org/1016117)
  * ☐ /usr/bin/pr
  * ☑ /usr/bin/printenv
  * ☑ /usr/bin/printf
  * ☑ /usr/bin/printf – [#1017110: 'X takes X to be a byte, not a character (and missing in manual)?](//bugs.debian.org/1017110)
  * ☐ /usr/bin/ptx
  * ☐ /usr/bin/realpath
  * ☑ /usr/bin/runcon – [#1013924: -c `getfscon()`s program verbatim but `execve()`s it; trojan moment?](//bugs.debian.org/1013924), cf. [BUGS](//srhtcdn.githack.com/~nabijaczleweli/voreutils/blob/man/man1/runcon.1.html#BUGS)


@@ 107,7 107,7 @@ GNU coreutils provide the following 106 binaries, according to `dpkg -L coreutil
  * ☑ /usr/bin/unlink
  * ☑ /usr/bin/users
  * ☑ /usr/bin/wc
  * ☑ /usr/bin/who – [#1016456: default isn't -s, -s doesn't force "only name, line, and time" output](//bugs.debian.org/1016456)
  * ☑ /usr/bin/who – [#1016456: default isn't -s, -s doesn't force "only name, line, and time" output](//bugs.debian.org/1016456), [#1016492: --ip/--lookup terminally broken with IPv6 entries](//bugs.debian.org/1016492)
  * ☑ /usr/bin/whoami
  * ☑ /usr/bin/yes – we don't ignore `--`, mirroring POSIX echo
  * ☑ /usr/sbin/chroot

M cmd/printf.cpp => cmd/printf.cpp +29 -8
@@ 7,6 7,7 @@
#include <clocale>
#include <cstdlib>
#include <cstring>
#include <cwchar>
#include <cwctype>
#include <iconv.h>
#include <inttypes.h>


@@ 15,6 16,7 @@
#include <optional>
#include <string>
#include <string_view>
#include <tuple>
#include <type_traits>
#include <vore-fnumeric>
#include <vore-iconv>


@@ 232,6 234,27 @@ int main(int, const char * const * argv) {
	std::string_view fmt = *argv++;

	bool err{};
	auto apostrophe_quite = [](const char * arg, const char *& end) {
		wchar_t ret = static_cast<std::uint8_t>(arg[1]);
		end         = arg + 1 + !!ret;

		if(ret) {
			std::mbstate_t ctx{};
			wchar_t c;
			switch(auto r = std::mbrtowc(&c, arg + 1, std::strlen(arg + 1), &ctx)) {
				case static_cast<std::size_t>(-2):  // incomplete
				case static_cast<std::size_t>(-1):  // EILSEQ
				case 0:                             // impossible
					break;
				default:
					ret = c;
					end = arg + 1 + r;
					break;
			}
		}

		return ret;
	};
	auto ingest_int = [&](auto tp) {
		if(!*argv)
			return tp;


@@ 240,10 263,9 @@ int main(int, const char * const * argv) {
		errno = 0;
		const char * end{};
		bool fatal{};
		if(arg[0] == '\'' || arg[0] == '"') {
			tp  = arg[1];
			end = arg + 2;
		} else {
		if(arg[0] == '\'' || arg[0] == '"')
			tp = apostrophe_quite(arg, end);
		else {
			fatal = true;
			if constexpr(std::is_signed_v<decltype(tp)>) {
				auto res = std::strtoll(arg, const_cast<char **>(&end), 0);


@@ 276,10 298,9 @@ int main(int, const char * const * argv) {
		long double ret;
		char * end{};
		bool fatal{};
		if(arg[0] == '\'' || arg[0] == '"') {
			ret = arg[1];
			end = const_cast<char *>(arg + 2);
		} else {
		if(arg[0] == '\'' || arg[0] == '"')
			ret = apostrophe_quite(arg, const_cast<const char *&>(end));
		else {
			fatal = true;
			switch(vore::parse_floating(arg, ret, &end)) {
				case vore::floating_error_t::none:

M man/printf.1 => man/printf.1 +3 -2
@@ 89,8 89,9 @@ Numbers can also be specified as
.Sy \&' Ns Ar C
or
.Sy \&" Ns Ar C ,
in which case they're equal to the value of the byte following
.Sy \&'" .
in which case they're equal to the value of the character in the current locale following
.Sy \&'"
.Pq the next byte if invalid, or Sy 0 No if none .
.Pp
Variable width and precision
.Pq Cm %*.*d

M tests/printf/test => tests/printf/test +5 -0
@@ 5,6 5,7 @@ tmpdir="$(mktemp -dt "truncate.XXXXXXXXXX")/"
locdir="${LOCDIR:-"$tmpdir"}"
printf="${CMDDIR}printf"
seq="${CMDDIR}seq"
wc="${CMDDIR}wc"
cd data 2>&3 || exit
[ -e "${locdir}cs_CZ.UTF-8" ] || localedef -i cs_CZ -c -f UTF-8 "${locdir}cs_CZ.UTF-8" &



@@ 49,6 50,10 @@ wait
# TODO? OpenBSD doesn't have standard locales, just mklocale ("We should switch to localedef and its file format, which is more standard.")
if [ -e "${locdir}cs_CZ.UTF-8" ]; then
	[ "$(LOCPATH="${locdir}" LC_ALL=cs_CZ.UTF-8 "$printf" '%g' 0,2 0.2 2>&3)" = "0,20,2" ] || echo "printf: locale fallback for floating-point failed?" >&3

	[ "$(LOCPATH="${locdir}" LC_ALL=cs_CZ.UTF-8 "$printf" '%x' \'🫃🏿 2>"${tmpdir}err")" = "1fac3" ] || echo "printf: 🫃🏿 wrong" >&3
	[ "$("$wc" -l < "${tmpdir}err")" = 1 ]                                                         || echo "printf: 🫃🏿 error wrong" >&3
	grep -q ' 🏿' "${tmpdir}err"                                                                    || echo "printf: 🫃🏿 error wrong" >&3
else
  echo "printf: failed to build test locales; OpenBSD?" >&2
fi