~acdw/bollux

1e06e8f5af834b02fdd7fc48413d3380fa93c974 — Case Duckworth 24 days ago 7dd75ca master
Change implementation of URL array
1 files changed, 182 insertions(+), 134 deletions(-)

M bollux
M bollux => bollux +182 -134
@@ 80,7 80,7 @@ bollux() {

	log d "BOLLUX_URL='$BOLLUX_URL'"

	run blastoff "$BOLLUX_URL"
	run blastoff -u "$BOLLUX_URL"
}

# process command-line arguments


@@ 142,6 142,8 @@ bollux_config() {
	: "${C_LIST:=0}"        # list formatting
	: "${C_QUOTE:=3}"       # quote formatting
	: "${C_PRE:=0}"         # preformatted text formatting
	## state
	UC_BLANK=':?:'
}

# quit happily


@@ 170,123 172,167 @@ prompt() { # prompt [-u] PROMPT [READ_ARGS...]

# load a URL
blastoff() { # blastoff [-u] URL
	local well_formed=true
	local proto url
	local u

	if [[ "$1" == "-u" ]]; then
		well_formed=false
		shift
		u="$(run uwellform "$2")"
	else
		u="$1"
	fi
	url="$1"

	if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then
		url="$(run transform_resource "$BOLLUX_URL" "$1")"
	local -a url
	run utransform url "$BOLLUX_URL" "$u"
	if ! ucdef url[1]; then
		run ucset url[1] "$BOLLUX_PROTO"
	fi
	[[ "$url" != *://* ]] && url="$BOLLUX_PROTO://$url"
	url="$(trim_string "$url")"
	proto="${url%://*}"

	log d "PROTO='$proto' URL='$url'"

	{
		if declare -Fp "${proto}_request" &>/dev/null; then
			run "${proto}_request" "$url"
		if declare -Fp "${url[1]}_request" >/dev/null 2>&1; then
			run "${url[1]}_request" "$url"
		else
			die 99 "No request handler for '$proto'!"
			die 99 "No request handler for '${url[1]}'"
		fi
	} | run normalize |
		{
			if declare -Fp "${proto}_response" &>/dev/null; then
				run "${proto}_response" "$url"
			else
				log x "No response handler for '$proto', passing through"
				passthru
			fi
		}
	} | run normalize | {
		if declare -Fp "${url[1]}_response" >/dev/null 2>&1; then
			run "${url[1]}_response" "$url"
		else
			log d "No response handler for '${url[1]}', passing thru"
			passthru
		fi
	}
}

# URLS
## https://tools.ietf.org/html/rfc3986
uwellform() {
	local u="$1"
	
	if [[ "$u" != *://* ]]; then
		u="$BOLLUX_PROTO://$u"
	fi

	u="$(trim_string "$u")"

	printf '%s\n' "$u"
}

usplit() { # usplit NAME:ARRAY URL:STRING
	local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
	[[ $2 =~ $re ]] || return $?

	local scheme="${BASH_REMATCH[2]}"
	local authority="${BASH_REMATCH[4]}"
	local path="${BASH_REMATCH[5]}"
	local query="${BASH_REMATCH[7]}"
	local fragment="${BASH_REMATCH[9]}"

	# 0=url 1=scheme 2=authority 3=path 4=query 5=fragment
	local i=1 c
	for c in scheme authority path query fragment; do
		if [[ "${!c}" || "$c" == path ]]; then
			printf -v "$1[$i]" '%s' "${!c}"
		else
			printf -v "$1[$i]" "$UC_BLANK"
		fi
		((i+=1))
	done
	printf -v "$1[0]" "$(ujoin "$1")" # inefficient I'm sure
}

# transform a URI according to RFC 3986 sec 5.2.2
transform_resource() { # transform_resource BASE_URL REFERENCE_URL
	local -A R B T # reference, base url, target
	eval "$(run parse_url B "$1")"
	eval "$(run parse_url R "$2")"
	# A non-strict parser may ignore a scheme in the reference
	# if it is identical to the base URI's scheme.
	if ! "${STRICT:-true}" && [[ "${R[scheme]}" == "${B[scheme]}" ]]; then
		unset "${R[scheme]}"
ujoin() { # ujoin NAME:ARRAY
	local -n U="$1"

	if ucdef U[1]; then
		printf -v U[0] "%s:" "${U[1]}"
	fi

	# basically pseudo-code from spec ported to bash
	if isdefined "R[scheme]"; then
		T[scheme]="${R[scheme]}"
		isdefined "R[authority]" && T[authority]="${R[authority]}"
		isdefined R[path] &&
			T[path]="$(run remove_dot_segments "${R[path]}")"
		isdefined "R[query]" && T[query]="${R[query]}"
	if ucdef U[2]; then
		printf -v U[0] "${U[0]}//%s" "${U[2]}"
	fi

	printf -v U[0] "${U[0]}%s" "${U[3]}"
	
	if ucdef U[4]; then
		printf -v U[0] "${U[0]}?%s" "${U[4]}"
	fi

	if ucdef U[5]; then
		printf -v U[0] "${U[0]}#%s" "${U[5]}"
	fi

	log d "${U[0]}"
}

ucdef() { [[ "${!1}" != "$UC_BLANK" ]]; } # ucdef NAME
ucblank() { [[ -z "${!1}" ]]; }           # ucblank NAME
ucset() { # ucset NAME VALUE
	run eval "${1}='$2'"
	run ujoin "${1/\[*\]}"
}

utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING
	local -a B R # base, reference
	local -n T="$1" # target
	usplit B "$2"
	usplit R "$3"

	# initialize T
	for ((i=1;i<=5;i++)); do
		T[$i]="$UC_BLANK"
	done

	# 0=url 1=scheme 2=authority 3=path 4=query 5=fragment
	if ucdef R[1]; then
		T[1]="${R[1]}"
		if ucdef R[2]; then
			T[2]="${R[2]}"
		fi
		if ucdef R[3]; then
			T[3]="$(pundot "${R[3]}")"
		fi
		if ucdef R[4]; then
			T[4]="${R[4]}"
		fi
	else
		if isdefined "R[authority]"; then
			T[authority]="${R[authority]}"
			isdefined "R[authority]" &&
				T[path]="$(remove_dot_segments "${R[path]}")"
			isdefined R[query] && T[query]="${R[query]}"
		if ucdef R[2]; then
			T[2]="${R[2]}"
			if ucdef R[2]; then
				T[3]="$(pundot "${R[3]}")"
			fi
			if ucdef R[4]; then
				T[4]="${R[4]}"
			fi
		else
			if isempty "R[path]"; then
				T[path]="${B[path]}"
				if isdefined R[query]; then
					T[query]="${R[query]}"
			if ucblank R[3]; then
				T[3]="${B[3]}"
				if ucdef R[4]; then
					T[4]="${R[4]}"
				else
					T[query]="${B[query]}"
					T[4]="${B[4]}"
				fi
			else
				if [[ "${R[path]}" == /* ]]; then
					T[path]="$(remove_dot_segments "${R[path]}")"
				if [[ "${R[3]}" == /* ]]; then
					T[3]="$(pundot "${R[3]}")"
				else
					T[path]="$(merge_paths "B[authority]" "${B[path]}" "${R[path]}")"
					T[path]="$(remove_dot_segments "${T[path]}")"
					T[3]="$(pmerge B R)"
					T[3]="$(pundot "${T[3]}")"
				fi
				if ucdef R[4]; then
					T[4]="${R[4]}"
				fi
				isdefined R[query] && T[query]="${R[query]}"
			fi
			T[authority]="${B[authority]}"
			T[2]="${B[2]}"
		fi
		T[scheme]="${B[scheme]}"
		T[1]="${B[1]}"
	fi
	isdefined R[fragment] && T[fragment]="${R[fragment]}"
	# cf. 5.3 -- recomposition
	local r
	isdefined "T[scheme]" && r="$r${T[scheme]}:"
	# remove the port from the authority
	isdefined "T[authority]" && r="$r//${T[authority]%:*}"
	r="$r${T[path]}"
	isdefined T[query] && r="$r?${T[query]}"
	isdefined T[fragment] && r="$r#${T[fragment]}"
	printf '%s\n' "$r"
}

# merge URL paths according to RFC 3986 sec 5.2.3
merge_paths() { # merge_paths BASE_AUTHORITY BASE_PATH REFERENCE_PATH
	# shellcheck disable=2034
	local B_authority="$1"
	local B_path="$2"
	local R_path="$3"
	# if R_path is empty, get rid of // in B_path
	if [[ -z "$R_path" ]]; then
		printf '%s\n' "${B_path//\/\//\//}"
		return
	if ucdef R[5]; then
		T[5]="${R[5]}"
	fi

	if isdefined "B_authority" && isempty "B_path"; then
		printf '/%s\n' "${R_path//\/\//\//}"
	else
		if [[ "$B_path" == */* ]]; then
			B_path="${B_path%/*}/"
		else
			B_path=""
		fi
		printf '%s/%s\n' "${B_path%/}" "${R_path#/}"
	fi
	ujoin T
}

# remove dot segments in paths according to RFC 3986 sec 5.2.4
remove_dot_segments() { # remove_dot_segments PATH
pundot() { # pundot PATH:STRING
	local input="$1"
	local output
	while [[ "$input" ]]; do


@@ 301,7 347,7 @@ remove_dot_segments() { # remove_dot_segments PATH
		elif [[ "$input" == . || "$input" == .. ]]; then
			input=
		else
			[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || log debug NOMATCH
			[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || return 1
			output="$output${BASH_REMATCH[1]}"
			input="${BASH_REMATCH[2]}"
		fi


@@ 309,36 355,28 @@ remove_dot_segments() { # remove_dot_segments PATH
	printf '%s\n' "${output//\/\//\//}"
}

# parse a url using the reference regex in RFC 3986 appendix B
parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
	local name="$1"
	local string="$2"
	local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
	[[ $string =~ $re ]] || return $?
pmerge() {
	local -n b="$1"
	local -n r="$2"

	local scheme="${BASH_REMATCH[2]}"
	local authority="${BASH_REMATCH[4]}"
	local path="${BASH_REMATCH[5]}"
	local query="${BASH_REMATCH[7]}"
	local fragment="${BASH_REMATCH[9]}"
	if ucblank r[3]; then
		printf '%s\n' "${b[3]//\/\//\//}"
		return
	fi

	for c in scheme authority query fragment; do
		[[ "${!c}" ]] &&
			run printf '%s[%s]=%q\n' "$name" "$c" "${!c}"
	done
	# unclear if the path is always set even if empty but it looks that way
	run printf '%s[path]=%q\n' "$name" "$path"
	if ucdef b[2] && ucblank b[3]; then
		printf '/%s\n' "${r[3]//\/\//\//}"
	else
		local bp=""
		if [[ "${b[3]}" == */* ]]; then
			bp="${b[3]%/*}"
		fi
		printf '%s/%s\n' "${bp%/}" "${r[3]#/}"
	fi
}

# is a NAME defined ('set' in bash)?
isdefined() { [[ "${!1+x}" ]]; } # isdefined NAME

# is a NAME defined AND empty?
isempty() { [[ ! "${!1-x}" ]]; } # isempty NAME

# work with URLs
# https://github.com/dylanaraps/pure-bash-bible/
urlencode() { # urlencode STRING
uencode() { # uencode URL:STRING
	local LC_ALL=C
	for ((i = 0; i < ${#1}; i++)); do
		: "${1:i:1}"


@@ 355,7 393,7 @@ urlencode() { # urlencode STRING
}

# https://github.com/dylanaraps/pure-bash-bible/
urldecode() { # urldecode STRING
udecode() { # udecode URL:STRING 
	: "${1//+/ }"
	printf '%b\n' "${_//%/\\x}"
}


@@ 363,19 401,28 @@ urldecode() { # urldecode STRING
# GEMINI
# https://gemini.circumlunar.space/docs/specification.html
gemini_request() { # gemini_request URL
	local url port server
	local ssl_cmd
	url="$1"
	port=1965
	server="${url#*://}"
	server="${server%%/*}"
	local -a url
	usplit url "$1"

	# get rid of userinfo
	ucset url[2] "${url[2]#*@}"

	local port
	if [[ "${url[2]}" == *:* ]]; then
		port="${url[2]#*:}"
		ucset url[2] "${url[2]%:*}"
	else
		port=1965 # TODO variablize
	fi

	ssl_cmd=(openssl s_client -crlf -quiet -connect "$server:$port")
	ssl_cmd+=(-servername "$server") # SNI
	# disable old TLS/SSL versions
	ssl_cmd+=(-no_ssl3 -no_tls1 -no_tls1_1)
	local ssl_cmd=(
		openssl s_client 
			-crlf -quiet -connect "${url[2]}:$port"
			-servername "${url[2]}" # SNI
			-no_ssl3 -no_tls1 -no_tls1_1 # disable old TLS/SSL versions
	)

	run "${ssl_cmd[@]}" <<<"$url" 2>/dev/null
	run "${ssl_cmd[@]}" <<<"$url"
}

gemini_response() { # gemini_response URL


@@ 399,7 446,7 @@ gemini_response() { # gemini_response URL
		10) run prompt "$meta" ;;
		11) run prompt "$meta" -s ;; # password input
		esac
		run blastoff "?$(urlencode "$REPLY")"
		run blastoff "?$(uencode "$REPLY")"
		;;
	2*) # OK
		REDIRECTS=0


@@ 480,7 527,7 @@ gopher_response() { # gopher_response URL
	cur_server="${BASH_REMATCH[1]}"
	type="${BASH_REMATCH[6]:-1}"

	run history_append "$url" "" # TODO: get the title ??
	run history_append "$url" "" # gopher doesn't really have titles, huh

	log d "TYPE='$type'"



@@ 618,9 665,10 @@ display() { # display METADATA [TITLE]
		set_title "$title${title:+ - }bollux"
		less_cmd=(less -R) # render ANSI color escapes
		mklesskey "$BOLLUX_LESSKEY" && less_cmd+=(-k "$BOLLUX_LESSKEY")
		local helpline="o:open, g/G:goto, [:back, ]:forward, r:refresh"
		less_cmd+=(
			-Pm"$(less_prompt_escape "$BOLLUX_URL") - bollux$" # 'status'line
			-P='o\:open, g\:goto, [\:back, ]\:forward, r\:refresh$' # helpline
			-P="$(less_prompt_escape "$helpline")$" # helpline
			-m # start with statusline
			+k # float content to the top
		)


@@ 910,7 958,7 @@ handle_keypress() { # handle_keypress CODE
		run blastoff "$BOLLUX_URL"
		;;
	53) # G - goto a url (pre-filled with current)
		prompt -u GO
		run prompt -u GO
		run blastoff -u "$REPLY"
		;;
	*) # 54-57 -- still available for binding