~radiocane/bollux

0dfab17325dbaeb67c3b024cabba97c91908e6c6 — White_Rabbit 6 months ago 44ad6c0
Many fixes and improvements

Fix HOMEPAGE_URL when the protocol is missing
Use session TEMP_DATA for responses
Move title heuristic, history_append to display
Remove fake file metadata
Remove redownload
1 files changed, 64 insertions(+), 75 deletions(-)

M bollux
M bollux => bollux +64 -75
@@ 79,15 79,17 @@ bollux() {
	run bollux_args "$@" # and argument parsing
	run bollux_init

	# If the user hasn't configured a home page, $BOLLUX_URL will be blank.
	# If the user hasn't configured a home page, and didn't provide it as
	# command line argument, $BOLLUX_URL will be blank.
	# So, prompt the user where to go.
	if [[ ! "${BOLLUX_URL:+x}" ]]; then
		run handle_keypress 349
	else
		HOMEPAGE_URL="$(run uwellform "$BOLLUX_URL")"
		BOLLUX_URL="$HOMEPAGE_URL"
		log d "BOLLUX_URL='$BOLLUX_URL'"
		HOMEPAGE_URL="$BOLLUX_URL"

		run blastoff -u "$BOLLUX_URL" # Visit the specified URL.
		run blastoff "$BOLLUX_URL" # Visit the specified URL.
	fi
}



@@ 215,6 217,8 @@ bollux_init() {
	# Remove $BOLLUX_LESSKEY and re-generate keybindings (to catch rebinds)
	run rm -f "$BOLLUX_LESSKEY"
	mklesskey

	TEMP_DATA="$(mktemp)"
}

# Cleanup on exit


@@ 749,7 753,6 @@ gemini_request() { # gemini_request URL
# Handle the gemini response - see [3] Section 3.
gemini_response() { # gemini_response URL
	local code meta # received on the first line of the response
	local title	# determined by a clunky heuristic, see read loop: (2*)
	local url="$1"	# the currently-visited URL.

	# Read the first line.


@@ 797,34 800,7 @@ gemini_response() { # gemini_response URL
		# requested content.
		REDIRECTS=0
		BOLLUX_URL="$url"
		# Janky heuristic to guess the title of a page.
		#
		# This while loop reads through the file looking for a line
		# starting with `#', which is a level-one heading in text/gemini
		# (see [3] Section 5).  It assumes that the first such heading is the
		# title of the page, and uses that title for the terminal title
		# and for the history.
		local pretitle
		while read -r; do
			# Since looping through the file consumes it (that is,
			# the file pointer (I think?) moves away from the
			# beginning of the file), the content we've read so far
			# must be saved in a `pretitle' variable, so it can be
			# printed later with the rest of the page.
			pretitle="$pretitle$REPLY"$'\n'
			if [[ "$REPLY" =~ ^#[[:space:]]*(.*) ]]; then
				title="${BASH_REMATCH[1]}"
				break
			fi
		done
		run history_append "$url" "${title:-}"
		# Print the pretitle and the rest of the document (`passthru' is
		# a pure-bash rewrite of `cat'), and pipe it through `display'
		# for typesetting.
		{
			printf '%s' "$pretitle"
			passthru
		} | run display "$meta" "${title:-}"
		cat | run display "$meta"
		;;
	(3*) # REDIRECT
		# Redirects are a fundamental part of any hypertext framework,


@@ 956,8 932,6 @@ gopher_response() { # gopher_response URL
	local cur_server="${BASH_REMATCH[1]}"
	local type="${BASH_REMATCH[6]:-1}"

	run history_append "$url" "" # gopher doesn't really have titles, huh

	# Gopher has a concept of 'line types', or maybe 'item types' --
	# basically, each line in a gophermap starts with a character, its type,
	# and then is followed by a series of tab-separated fields describing


@@ 971,13 945,13 @@ gopher_response() { # gopher_response URL
		# Since gopher doesn't send MIME-type information in-band, we
		# just assume it's text/plain, and try to convert it later to
		# UTF-8 with `iconv'.
		run display text/plain
		run display "text/plain"
		;;
	(1) # Item is a directory [gophermap]
		# Since I've already written all the code to typeset gemini
		# well, it's easy to convert a gophermap to text/gemini and
		# display it than to write a whole new gophermap typesetter.
		run gopher_convert | run display text/gemini
		run gopher_convert | run display "text/gemini"
		;;
	(3) # Error
		# I don't know all the gopher error cases, and the spec is


@@ 989,7 963,7 @@ gopher_response() { # gopher_response URL
		# Gopher search queries are separated from their resources by a
		# TAB.  It's wild.
		if [[ "$url" =~ $'\t' ]]; then
			run gopher_convert | run display text/gemini
			run gopher_convert | run display "text/gemini"
		else
			run prompt 'SEARCH'
			run blastoff "$url	$REPLY"


@@ 1024,19 998,14 @@ gopher_response() { # gopher_response URL
#
################################################################################
file_request() { # file_request URL
	# maybe check for the file existence? maybe not.
	# maybe check for the file existence? its extension?
	local -a url
	run usplit url "$1"
	cat ${url[3]}
}

file_response() { # file_response URL
	# maybe check for the file existence? for its extension?
	local title meta
	title=FILE
	meta=text/gemini
	run history_append "$1" "" # no title for file
	run display "$meta" "${title:-}"
	run display
}

# The original pure-bash-'cat' was changed to real 'cat'


@@ 1180,11 1149,10 @@ spartan_response() {
	case "$code" in
	(2) # SUCCESS
		BOLLUX_URL="$url"
		run history_append "$url" "" # no title for spartan
		run display $meta
		run display "$meta"
		;;
	(*) # ANYTHING ELSE
		printf '%s %s\r\n' "$code" "$meta" | run display text/plain
		printf '%s %s\r\n' "$code" "$meta" | run display "text/plain"
		;;
	esac
}


@@ 1197,34 1165,49 @@ spartan_response() {
################################################################################

# display the fetched content
display() { # display METADATA [TITLE]
	local -a less_cmd
display() { # display METADATA
	local -a meta
	local mime charset
	local title=""
	local -a less_cmd

	# split header line
	local -a hdr
	IFS=';' read -ra hdr <<<"$1"
	# title is optional but nice looking
	local title
	if (($# == 2)); then
		title="$2"
	fi

	mime="$(trim_string "${hdr[0],,}")"
	for ((i = 1; i <= "${#hdr[@]}"; i++)); do
		h="${hdr[$i]}"
	# split METADATA
	IFS=';' read -ra meta <<<"$1"
	mime="$(trim_string "${meta[0],,}")"
	for ((i = 1; i <= "${#meta[@]}"; i++)); do
		h="${meta[$i]}"
		case "$h" in
		(*charset=*) charset="${h#*=}" ;;
		esac
	done

	[[ -z "$mime" ]] && mime="text/gemini"
	[[ -z "$charset" ]] && charset="utf-8"

	log debug "mime='$mime'; charset='$charset'"

	case "$mime" in
	(text/*)
		# Janky heuristic to guess the title of a page.
		#
		# This while loop reads through the file looking for a line
		# starting with `#', which is a level-one heading in text/gemini
		# (see [3] Section 5).  It assumes that the first such heading
		# is the title of the page, and uses that title for the
		# terminal title and for the history.
		local pretitle
		while read -r; do
			# Since looping through the file consumes it (that is,
			# the file pointer (I think?) moves away from the
			# beginning of the file), the content we've read so far
			# must be saved in a `pretitle' variable, so it can be
			# printed later with the rest of the page.
			pretitle="$pretitle$REPLY"$'\n'
			if [[ "$REPLY" =~ ^#[[:space:]]*(.*) ]]; then
				title="${BASH_REMATCH[1]}"
				break
			fi
		done
		run history_append "$url" "${title:-}"

		set_title "$title${title:+ - }bollux"
		# Build the `less' command
		less_cmd=(less)


@@ 1261,14 1244,23 @@ display() { # display METADATA [TITLE]
			typeset="passthru"
		fi

		# Print the pretitle and the rest of the document (`passthru' is
		# a pure-bash rewrite of `cat'), and pipe it through `display'
		# for typesetting.
		{
			printf '%s' "$pretitle"
			passthru
		} | {
			run iconv -f "${charset^^}" -t "UTF-8" |
				run tee "$BOLLUX_PAGESRC" |
				run "$typeset" | #cat
				run "${less_cmd[@]}" && bollux_quit
		} || run handle_keypress "$?"
		;;
	(*) run download "$BOLLUX_URL" ;;
	(*)
		run history_append "$BOLLUX_URL" ""
		set_title "bollux"
		run download "$BOLLUX_URL" ;;
	esac
}



@@ 1554,7 1546,7 @@ handle_keypress() { # handle_keypress CODE
		;;
	(349)
		run prompt "(HOMEPAGE) GO"
		HOMEPAGE_URL="$REPLY"
		HOMEPAGE_URL="$(run uwellform "$REPLY")"
		run handle_keypress 249
		;;
	(50) # [ - back in the history


@@ 1581,7 1573,7 @@ handle_keypress() { # handle_keypress CODE
		run blastoff "$BOLLUX_URL"
		;;
	(55) # v - goto HOMEPAGE
		run blastoff "$HOMEPAGE_URL"
		run blastoff -u "$HOMEPAGE_URL"
		;;
	(56) # (backspace) - goto HISTORY
		run blastoff "file://$BOLLUX_HISTFILE"


@@ 1646,19 1638,14 @@ download() {
	# The binary file has been corrupted by normalize, which strips 0x0d
	# bytes. Something also drops NULL bytes. So, we'll discard this data
	cat > /dev/null
	# Now it's time to re-download the binary file
	temp_data="$(mktemp)"
	log x "Downloading: '$BOLLUX_URL' => '$temp_data'..."
	gemini_request $BOLLUX_URL | dd status=progress > $temp_data

	# Now $temp_data holds both the header and the data
	HEADER=`head -1 $temp_data`
	# $TEMP_DATA holds both the header and the data
	HEADER=`head -1 $TEMP_DATA`
	# To get the header length we use ${#HEADER} syntax, but this gives
	# a bad value because it doesn't count the last byte 0x0A.
	# We sum 2 because tail wants the first useful byte.
	let FIRST_BYTE=${#HEADER}+2
	temp_name="$(mktemp)"
	tail --bytes=+$FIRST_BYTE $temp_data > $temp_name
	tail --bytes=+$FIRST_BYTE $TEMP_DATA > $temp_name

	final_name="$BOLLUX_DOWNDIR/${BOLLUX_URL##*/}"
	if [[ -f "$final_name" ]]; then


@@ 1785,7 1772,9 @@ blastoff() { # blastoff [-u] URL
		else
			die 99 "No request handler for '${url[1]}'"
		fi
	} | run normalize | {
	} > $TEMP_DATA
	# TODO handle nonresponsive servers
	cat $TEMP_DATA | run normalize | {
		if declare -F "${url[1]}_response" >/dev/null 2>&1; then
			run "${url[1]}_response" "$url"
		else