~nabijaczleweli/voreutils

f365dabb5a5232053fc5306ab15ca180b33da7ec — наб 15 days ago 81d278e
Add pathchk
7 files changed, 344 insertions(+), 5 deletions(-)

M README.md
A cmd/pathchk.cpp
M man/fold.1
A man/pathchk.1
M tests/link
A tests/pathchk
M tests/test/test
M README.md => README.md +5 -2
@@ 69,7 69,7 @@ GNU coreutils provide the following 105 binaries, according to `dpkg -L coreutil
  * ☐ /usr/bin/numfmt
  * ☐ /usr/bin/od
  * ☑ /usr/bin/paste
  * ☐ /usr/bin/pathchk
  * ☑ /usr/bin/pathchk
  * ☐ /usr/bin/pinky
  * ☐ /usr/bin/pr
  * ☑ /usr/bin/printenv


@@ 153,9 153,12 @@ delim %%
.EN
```
(or whichever delimiter is best) after `.Sh DESCRIPTION` and disable it at the end.
If typesetting something that doesn't work in nroff mode (like the big equations in `base64.1`) provide an `.if n`/`.el` alternative in `.Fn`-like syntax;

If typesetting something that doesn't work in nroff mode (like the big equations in `base64.1`) provide an `.ie n`/`.el` alternative in `.Fn`-like syntax;
otherwise (like the polynomial in `cksum.1`) enable eqn(1) preprocessing in man(1) by starting with `'\" e`.

If typesetting something that doesn't work in troff mode, prefer `.ie t` (cf. `pathchk.1`).

In mandoc [delimited eqn(1) breaks conditionals](https://inbox.vuxu.org/mandoc-tech/20210908132448.xknl7noihywihkdh@tarta.nabijaczleweli.xyz/T/),
wrap them in braces (`.el \{ [text] % eqn % [text] \}`).


A cmd/pathchk.cpp => cmd/pathchk.cpp +118 -0
@@ 0,0 1,118 @@
// SPDX-License-Identifier: 0BSD


#include <algorithm>
#include <errno.h>
#include <limits.h>
#include <string>
#include <string_view>
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <vore-getopt>
#include <vore-optarg>
#include <vore-stdio>
#include <vore-token>


using namespace std::literals;

#define USAGE(self) "usage: %s [-Pp] path...", self


enum class system_t : bool { current, posix };
static const constexpr std::size_t path_max[] = {PATH_MAX, _POSIX_PATH_MAX};

/// POSIX.1-200x/D2, 3.254 Portable Filename Character Set; sorted for the std::binary_search()
static const constexpr std::string_view portable_filename_character_set = "-."
                                                                          "0123456789"
                                                                          "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
                                                                          "_"
                                                                          "abcdefghijklmnopqrstuvwxyz"sv;


int main(int argc, char * const * argv) {
	auto system = system_t::current;
	bool forbid_option_like{};
	for(auto && [arg, val] : vore::opt::get{argc, argv, "+pP", {{"portability", no_argument, nullptr, 'Q'}}})
		switch(arg) {
			case 'p':
				system = system_t::posix;
				break;
			case 'P':
				forbid_option_like = true;
				break;

			case 'Q':  // --portability
				system             = system_t::posix;
				forbid_option_like = true;
				break;

			default:
				return std::fprintf(stderr, USAGE(argv[0])), 1;
		}
	if(!*(argv + optind))
		return std::fprintf(stderr, USAGE(argv[0])), 1;


	bool err{};
	std::string buf;
	for(std::string_view path : vore::opt::args{argv + optind}) {
		if(forbid_option_like && path.empty())
			std::fprintf(stderr, "%s: %s: empty\n", argv[0], path.data()), err = true;

		if(path.size() > path_max[static_cast<bool>(system)])
			std::fprintf(stderr, "%s: %s: too long (%zu > %zu)\n", argv[0], path.data(), path.size(), path_max[static_cast<bool>(system)]), err = true;


		bool whole_ok{};
		switch(system) {
			case system_t::current: {
				struct stat sb;
				if(!lstat(path.data(), &sb))
					whole_ok = true;
				else if(errno != ENOENT)
					std::fprintf(stderr, "%s: %s: %s\n", argv[0], path.data(), std::strerror(errno)), err = true;  // this works for both ENAMETOOLONG and EINVAL
			} break;

			case system_t::posix:
				break;
		}

		for(auto name : vore::soft_tokenise<true>{path.data(), "/"}) {
			if(forbid_option_like && name[0] == '-')
				std::fprintf(stderr, "%s: %s: %.*s: starts with -\n", argv[0], path.data(), (int)name.size(), name.data()), err = true;

			switch(system) {
				case system_t::current:
					if(!whole_ok) {
						auto maxnamelen =
						    pathconf(name.data() == path.data() ? "." : (buf = {path.data(), static_cast<std::size_t>(name.data() - path.data())}).c_str(), _PC_NAME_MAX);
						if(maxnamelen == -1 && errno == ENOENT)
							maxnamelen = _POSIX_NAME_MAX;
						else if(maxnamelen == -1)  // this works for both ENAMETOOLONG and EINVAL
							std::fprintf(stderr, "%s: %s: %s: %s\n", argv[0], path.data(), buf.c_str(), std::strerror(errno)), err = true;
						if(name.size() > static_cast<std::size_t>(maxnamelen))  // -1 overflow into max for errors
							std::fprintf(stderr, "%s: %s: %.*s: too long (%zu > %zu)\n", argv[0], path.data(), (int)name.size(), name.data(), name.size(),
							             static_cast<std::size_t>(maxnamelen)),
							    err = true;
					}
					break;

				case system_t::posix:
					if(name.size() > _POSIX_NAME_MAX)
						std::fprintf(stderr, "%s: %s: %.*s: too long (%zu > %zu)\n", argv[0], path.data(), (int)name.size(), name.data(), name.size(),
						             static_cast<std::size_t>(_POSIX_NAME_MAX)),
						    err = true;

					buf.clear();
					std::copy_if(std::begin(name), std::end(name), std::back_inserter(buf),
					             [](auto c) { return !std::binary_search(std::begin(portable_filename_character_set), std::end(portable_filename_character_set), c); });
					if(!buf.empty())
						std::fprintf(stderr, "%s: %s: %.*s: non-portable characters: %s\n", argv[0], path.data(), (int)name.size(), name.data(), buf.c_str()), err = true;
					break;
			}
		}
	}
	return err;
}

M man/fold.1 => man/fold.1 +1 -1
@@ 122,6 122,6 @@ copies it as an installed command with an update to also handle
.Fl w .
.Pp
.St -p1003.2-92
creates
specifies locale behaviour and creates
.Fl sb ,
as present-day.

A man/pathchk.1 => man/pathchk.1 +168 -0
@@ 0,0 1,168 @@
.\" SPDX-License-Identifier: 0BSD
.\"
.Dd
.Dt PATHCHK 1
.Os
.
.Sh NAME
.Nm pathchk
.Nd validate filenames
.Sh SYNOPSIS
.Nm
.Op Fl \&Pp
.Ar path Ns …
.
.Sh DESCRIPTION
Checks for validity and/or portability of
.Ar path Ns s :
.Bl -bullet -compact -offset 'ab'
.It
total length,
.It
length of individual components,
.It
character set, and
.It
searchability.
.El
Failed paths and components are listed in the standard error stream alongside the failed predicate.
.Pp
With
.Fl p ,
each
.Ar path
is considered as if on an abstract base-line
.Tn POSIX
system:
.Bl -bullet -compact -offset 'ab'
.It
maximum length of
.Dv _POSIX_PATH_MAX
.Pq Sy 256 ,
.It
maximum component length of
.Dv _POSIX_NAME_MAX
.Pq Sy 14 ,
.It
the portable filename character set:
.Li [A-Za-z0-9._-]+ .
.El
.Pp
Otherwise, it resides on the current system:
.Bl -bullet -compact -offset 'ab'
.It
maximum length of
.Dv PATH_MAX
.Pq most likely Sy 4096 ,
.It
maximum component length of
.Sy pathconf Ns Pq Dv _PC_NAME_MAX
for the parent directory, if exists, or
.Dv _POSIX_NAME_MAX ,
.It
alphabet verified indirectly by
.Xr lstat 2 ,
which should return
.Er EINVAL ,
.It
searchability verified by
.Xr lstat 2 ,
which returns
.Er EACCES
if any component was unsearchable.
.El
.
.Sh OPTIONS
.Bl -tag -compact -width "--portability"
.It Fl P
Additionally reject empty
.Ar path Ns s
and components starting with
.Qo - Qc Ns s .
.It Fl p
Check for the
.Tn POSIX
system (see above).
.It Fl -portability
.Fl \&Pp
.El
.
.Sh EXIT STATUS
.Sy 1
if any
.Ar path
failed any check.
.
.Sh EXAMPLES
Check whether a directory tree can be safely transferred to any conformant system:
.Bd -literal -compact
.Li $ Nm find Li \&. -exec Nm pathchk Fl \&Pp Li {} +
.ie t \{ .
# Broken and reduced to fit the page
pathchk: ./POSIX/POSIX.1-202x_d1.pdf: POSIX.1-202x_d1.pdf: too long (19 > 14)
pathchk: ./POSIX/IEEE P1003.2 Draft 11.2−September 1991.txt: …:
         too long (44 > 14)
pathchk: ./POSIX/IEEE P1003.2 Draft 11.2−September 1991.txt: …:
         non-portable characters:    − \&
pathchk: ./BSD/UNIX_Users_Manual_Release_3_Jun80.pdf: …: too long (37 > 14)
pathchk: ./BSD/PWB UNIX User's Manual, Edition 1.0.pdf: …: too long (39 > 14)
pathchk: ./BSD/PWB UNIX User's Manual, Edition 1.0.pdf: …:
         non-portable characters:   ' ,  \&
pathchk: ./Документация по ДЕМОС 2.10.tbz2: …: too long (50 > 14)
pathchk: ./Документация по ДЕМОС 2.10.tbz2: …:
         non-portable characters: Документация по ДЕМОС
pathchk: ./fips180-2withchangenotice.pdf: …: too long (29 > 14)
. \}
.el \{ .
pathchk: ./POSIX/POSIX.1-202x_d1.pdf: POSIX.1-202x_d1.pdf: too long (19 > 14)
pathchk: ./POSIX/IEEE P1003.2 Draft 11.2−September 1991.txt: IEEE P1003.2 Draft 11.2−September 1991.txt: too long (44 > 14)
pathchk: ./POSIX/IEEE P1003.2 Draft 11.2−September 1991.txt: IEEE P1003.2 Draft 11.2−September 1991.txt: non-portable characters:    − \&
pathchk: ./BSD/UNIX_Users_Manual_Release_3_Jun80.pdf: UNIX_Users_Manual_Release_3_Jun80.pdf: too long (37 > 14)
pathchk: ./BSD/PWB UNIX User's Manual, Edition 1.0.pdf: PWB UNIX User's Manual, Edition 1.0.pdf: too long (39 > 14)
pathchk: ./BSD/PWB UNIX User's Manual, Edition 1.0.pdf: PWB UNIX User's Manual, Edition 1.0.pdf: non-portable characters:   ' ,  \&
pathchk: ./Документация по ДЕМОС 2.10.tbz2: Документация по ДЕМОС 2.10.tbz2: too long (50 > 14)
pathchk: ./Документация по ДЕМОС 2.10.tbz2: Документация по ДЕМОС 2.10.tbz2: non-portable characters: Документация по ДЕМОС
pathchk: ./fips180-2withchangenotice.pdf: fips180-2withchangenotice.pdf: too long (29 > 14)
. \}
.Ed
.
.Sh SEE ALSO
.Xr pathconf 3
.
.Sh STANDARDS
Conforms to
.St -p1003.1-2008 ;
.Fl -portability
is a extension, originating from the GNU system.
.
.Sh HISTORY
Created in
.St -p1003.2-92
with only
.Fl p ,
to, alongside
.Xr sh 1
noclobber
.Pq Nm set Fl C ,
replace the need for the proposed
.Xr mktemp 1 ,
.Nm validfnam ,
and similar utilities, quoting pairing
.Nm
with a loop over
.Qq Ev $TMPDIR Ns Pa /application_abbreviation Ns Ev $$ Ns Pa .suffix
as sufficient, instead.
.Pp
.St -p1003.1-2008
adds
.Fl P ,
noting that the
.Fl \&Pp
needs to be used to ensure "full filename portability" across all systems, with the split being retained for compatibility.
.
.Sh BUGS
.Dv PATH_MAX ,
well, isn't \(em it's defined to
.Sy 4096
on all modern systems for compatibility, but longer paths are, usually, perfectly legal.
Keep this in mind.

M tests/link => tests/link +1 -1
@@ 1,7 1,7 @@
#!/bin/sh
# SPDX-License-Identifier: 0BSD

tmpdir="$(mktemp -dt "truncate.XXXXXXXXXX")/"
tmpdir="$(mktemp -dt "link.XXXXXXXXXX")/"
link="${CMDDIR}link"

stat -c"%i" /dev/null > /dev/null 2>&1 && sc='-c' || sc='-f'

A tests/pathchk => tests/pathchk +50 -0
@@ 0,0 1,50 @@
#!/bin/sh
# SPDX-License-Identifier: 0BSD

tmpdir="$(mktemp -dt "pathchk.XXXXXXXXXX")/"
pathchk="${CMDDIR}pathchk"
cd "${tmpdir}" 2>&3 || exit

bigp() {
	for p in '' -p; do
		"$pathchk" $p    "$@" 2>&3             || echo "pathchk: $p    $* failed?" >&3
		"$pathchk" $p -P "$@" 2>"${tmpdir}err" && echo "pathchk: $p -P $* okay?" >&3
		[ -s "${tmpdir}err" ]                  || echo "pathchk: empty stderr for $p -P $*" >&3
	done
}

bigp ''
bigp -- '-p'
bigp "/tmp/-p"

smallp_ok() {
	for P in '' -P; do
		"$pathchk" $P -p "$@" 2>&3 || echo "pathchk: $P -p $* failed?" >&3
	done
}

smallp_bad() {
	for P in '' -P; do
		"$pathchk" $P -p "$@" 2>"${tmpdir}err" && echo "pathchk: $P -p $* okay?" >&3
		[ -s "${tmpdir}err" ]                  || echo "pathchk: empty stderr for $P -p $*" >&3
	done
}

smallp_ok "abcdefghijklmn" "/opqrstuvwxyz01/23456789101112" "opqrstuvwxyz01/23456789101112" "abcdefghijklmn/opqrstuvwxyz01/23456789101112"
smallp_bad "ąbcdefgh"
smallp_bad "abcdefghijklmnA" "/opqrstuvwxyz01/23456789101112A" "opqrstuvwxyz01A/23456789101112A" "abcdefghijklmnA/opqrstuvwxyz01A/23456789101112A"


mkdir -p "abcdefghijklmn/opqrstuvwxyz01/23456789101112"
chmod -x "abcdefghijklmn/opqrstuvwxyz01"
smallp_ok "abcdefghijklmn" "/opqrstuvwxyz01/23456789101112" "opqrstuvwxyz01/23456789101112" "abcdefghijklmn/opqrstuvwxyz01/23456789101112"

for P in '' -P; do
	"$pathchk" "abcdefghijklmn" "/opqrstuvwxyz01/23456789101112" "opqrstuvwxyz01/23456789101112" 2>&3 || echo "pathchk: $P abcdefghijklmn /opqrstuvwxyz01/23456789101112 opqrstuvwxyz01/23456789101112 failed?" >&3

	"$pathchk" $P "abcdefghijklmn/opqrstuvwxyz01/23456789101112" 2>"${tmpdir}err" && echo "pathchk: $P abcdefghijklmn/opqrstuvwxyz01/23456789101112 okay?" >&3
	[ -s "${tmpdir}err" ]                                                         || echo "pathchk: empty stderr for $P abcdefghijklmn/opqrstuvwxyz01/23456789101112" >&3
done

chmod -R 777 "$tmpdir"
rm -rf "$tmpdir" 2>&3

M tests/test/test => tests/test/test +1 -1
@@ 76,7 76,7 @@ for t in [ test; do
  "$tst" -e "${tmpdir}[" $bkt || echo "test: $t -e [         $bkt failed?" >&3
  "$tst" -e "/ENOENT"    $bkt && echo "test: $t -e /ENOENT   $bkt okay?" >&3

  printf ''     > "${tmpdir}empty"
                > "${tmpdir}empty"
  echo nonempty > "${tmpdir}nonempty"
  "$tst" -s /                   $bkt || echo "test: $t -s /         $bkt failed?" >&3
  "$tst" -s /dev/null           $bkt && echo "test: $t -s /dev/null $bkt okay?" >&3