~charles/misc

bd815e1fa0fe9cb931e6a5432a94e2865c500b39 — Charles A. Daniels 2 months ago 2daf5d7
check in qtar2 experiment
A qtar2/images/actiniae.jpg => qtar2/images/actiniae.jpg +0 -0
A qtar2/images/actiniae.txt => qtar2/images/actiniae.txt +78 -0
@@ 0,0 1,78 @@
public-domain, retrieved from https://www.loc.gov/resource/ds.05318/ 2024-05-16

 Title

    Actiniae. - Seeanemonen

Summary

    Illustration showing a variety of sea anemones.

Names

    Giltsch, Adolf, 1852-1911, lithographer
    Haeckel, Ernst, 1834-1919, artist

Created / Published

    Leipzig und Wien, Verlag des Bibliographischen Instituts, 1904.

Headings

    -  Actiniidae
    -  Sea anemones--1900-1910

Headings

    Book illustrations--1900-1910.
    Photomechanical prints--Color--1900-1910.
    Scientific illustrations--1900-1910.

Genre

    Book illustrations--1900-1910
    Scientific illustrations--1900-1910
    Photomechanical prints--Color--1900-1910

Notes

    -  Title from item.
    -  Photomechanical print of lithograph by Adolf Giltsch after sketch by Ernst Haeckel.
    -  Illus. in: Kunstformen der Natur. Von Prof. Dr. Ernst Haeckel. Hundert Illustrationstafeln mit beschreibendem Text, allgemeine Erläuterung und systematische Übersicht. Leipzig und Wien : Verlag des Bibliographischen Instituts, 1904, tafel 49, Heliactis.

Medium

    1 print : photomechanical, color ; sheet 36 x 26 cm..

Call Number/Physical Location

    Illus. in QH46.H18 (Case Y) [P&P]

Repository

    Library of Congress Prints and Photographs Division Washington, D.C. 20540 USA http://hdl.loc.gov/loc.pnp/pp.print

Digital Id

    ds 05318 //hdl.loc.gov/loc.pnp/ds.05318

Library of Congress Control Number

    2014645036

Reproduction Number

    LC-DIG-ds-05318 (digital file from original item)

Rights Advisory

    No known restrictions on publication.

Online Format

    image

LCCN Permalink

    https://lccn.loc.gov/2014645036


A qtar2/images/camille.jpg => qtar2/images/camille.jpg +0 -0
A qtar2/images/camille.txt => qtar2/images/camille.txt +76 -0
@@ 0,0 1,76 @@
public-domain, retrieved from https://www.loc.gov/resource/highsm.04168/ 2024-05-16

 Title

    S.S. Hurricane Camille after Hurricane Katrina, Gulfport, Mississippi

Names

    Highsmith, Carol M., 1946-, photographer

Created / Published

    2006 April 12.

Headings

    -  United States--Mississippi--Gulfport
    -  S.S. Hurricane Camille
    -  Mississippi Coast
    -  America

Headings

    Digital photographs--Color--2000-2010.

Genre

    Digital photographs--Color--2000-2010

Notes

    -  Title, date, and subjects provided by the photographer.
    -  Credit line: Carol M. Highsmith's America, Library of Congress, Prints and Photographs Division.
    -  Gift and purchase; Carol M. Highsmith; 2009; (DLC/PP-2010:031).
    -  Forms part of: Carol M. Highsmith's America Project in the Carol M. Highsmith Archive.

Medium

    1 photograph : digital, TIFF file, color.

Call Number/Physical Location

    LC-DIG-highsm- 04168 (ONLINE) [P&P]

Source Collection

    Highsmith, Carol M., 1946- Carol M. Highsmith Archive.

Repository

    Library of Congress Prints and Photographs Division Washington, D.C. 20540 USA http://hdl.loc.gov/loc.pnp/pp.print

Digital Id

    highsm 04168 //hdl.loc.gov/loc.pnp/highsm.04168

Library of Congress Control Number

    2010630163

Reproduction Number

    LC-DIG-highsm-04168 (original digital file)

Rights Advisory

    No known restrictions on publication.

Online Format

    image

LCCN Permalink

    https://lccn.loc.gov/2010630163


A qtar2/images/figurehead.jpg => qtar2/images/figurehead.jpg +0 -0
A qtar2/images/figurehead.txt => qtar2/images/figurehead.txt +76 -0
@@ 0,0 1,76 @@
public-domain, retrieved from https://www.loc.gov/resource/highsm.12938/ 2024-05-16

 Title

    Wooden ship's figureheads are featured at the Mystic Seaport Maritime Museum in Mystic, Connecticut

Names

    Highsmith, Carol M., 1946-, photographer

Created / Published

    [between 1980 and 2006]

Headings

    -  United States--Connecticut--Mystic
    -  America
    -  Mystic Seaport

Headings

    Transparencies--color--1980-2010.

Genre

    Transparencies--Color--1980-2010

Notes

    -  Digital image produced by Carol M. Highsmith to represent her original film transparency; some details may differ between the film and the digital images.
    -  Title, date, and keywords provided by the photographer.
    -  Credit line: Photographs in the Carol M. Highsmith Archive, Library of Congress, Prints and Photographs Division.
    -  Gift and purchase; Carol M. Highsmith; 2011; (DLC/PP-2011:124).
    -  Forms part of the Selects Series in the Carol M. Highsmith Archive.

Medium

    1 transparency : color ; 4 x 5 in. or smaller.

Call Number/Physical Location

    LC-HS503- 1677 (ONLINE) [P&P]

Source Collection

    Highsmith, Carol M., 1946- Carol M. Highsmith Archive.

Repository

    Library of Congress Prints and Photographs Division Washington, D.C. 20540 USA http://hdl.loc.gov/loc.pnp/pp.print

Digital Id

    highsm 12938 //hdl.loc.gov/loc.pnp/highsm.12938

Library of Congress Control Number

    2011631132

Reproduction Number

    LC-DIG-highsm-12938 (digital file from original) LC-HS503-1677 (color film transparency)

Rights Advisory

    No known restrictions on publication.

Online Format

    image

LCCN Permalink

    https://lccn.loc.gov/2011631132


A qtar2/images/shuttle.jpg => qtar2/images/shuttle.jpg +0 -0
A qtar2/images/shuttle.txt => qtar2/images/shuttle.txt +82 -0
@@ 0,0 1,82 @@
public-domain, retrieved from https://www.loc.gov/resource/hhh.tx1106.sheet/?sp=2 2024-05-16


Title

    Space Transportation System, Lyndon B. Johnson Space Center, 2101 NASA Parkway, Houston, Harris County, TX

Names

    Historic American Engineering Record, creator
    National Aeronautics and Space Administration, Owner
    Smithsonian Institution, Owner
    Deming, Joan, historian
    Slovinac, Patricia, historian
    Archaeological Consultants, Inc., contractor
    QinetiQ North America, contractor
    Wolfe, Jeffrey, field team
    Nehr, Adam, field team
    Farrar, Tom, field team
    Behrens, Thomas M., program manager
    Wachtel, John, delineator
    Klimek, Joseph, delineator
    Pierce, Ryan, delineator
    Smart GeoMetrics, contractor
    Lowe, Jet, photographer
    Johnson Space Center, Space Shuttle Program Transition and Retirement Office, sponsor

Created / Published

    Documentation compiled after 1968

Headings

    -  national space program
    -  space exploration
    -  space flight
    -  man in space
    -  Shuttle Program
    -  rocket propulsion
    -  fuel tanks
    -  Texas--Harris County--Houston

Latitude / Longitude

    29.552348,-95.093708

Notes

    -  Significance: The Orbiter Discovery, OV-103, is considered eligible for listing in the National Register of Historic Places (NRHP) in the context of the U.S. Space Shuttle Program (1969-2011) under Criterion A in the areas of Space Exploration and Transportation and under Criterion C in the area of Engineering. Because it has achieved significance within the past fifty years, Criteria Consideration G applies. Under Criterion A, Discovery is significant as the oldest of the three extant orbiter vehicles constructed for the Space Shuttle Program (SSP), the longest running American space program to date; she was the third of five orbiters built by NASA. Unlike the Mercury, Gemini, and Apollo programs, the SSP’s emphasis was on cost effectiveness and reusability, and eventually the construction of a space station. Including her maiden voyage (launched August 30, 1984), Discovery flew to space thirty-nine times, more than any of the other four orbiters; she was also the first orbiter to fly twenty missions. She had the honor of being chosen as the Return to Flight vehicle after both the Challenger and Columbia accidents. Discovery was the first shuttle to fly with the redesigned SRBs, a result of the Challenger accident, and the first shuttle to fly with the Phase II and Block I SSME. Discovery also carried the Hubble Space Telescope to orbit and performed two of the five servicing missions to the observatory. She flew the first and last dedicated Department of Defense (DoD) missions, as well as the first unclassified defense-related mission. In addition, Discovery was vital to the construction of the International Space Station (ISS); she flew thirteen of the thirty-seven total missions flown to the station by a U.S. Space Shuttle. She was the first orbiter to dock to the ISS, and the first to perform an exchange of a resident crew. Under Criterion C, Discovery is significant as a feat of engineering. According to Wayne Hale, a flight director from Johnson Space Center, the Space Shuttle orbiter represents a “huge technological leap from expendable rockets and capsules to a reusable, winged, hypersonic, cargo-carrying spacecraft.” Although her base structure followed a conventional aircraft design, she used advanced materials that both minimized her weight for cargo-carrying purposes and featured low thermal expansion ratios, which provided a stable base for her Thermal Protection System (TPS) materials. The Space Shuttle orbiter also featured the first reusable TPS; all previous spaceflight vehicles had a single-use, ablative heat shield. Other notable engineering achievements of the orbiter included the first reusable orbital propulsion system, and the first two-fault-tolerant Integrated Avionics System. As Hale stated, the Space Shuttle remains “the largest, fastest, winged hypersonic aircraft in history,” having regularly flown at twenty-five times the speed of sound.
    -  Survey number: HAER TX-116

Medium

    Measured Drawing(s): 6
    Data Page(s): 729

Call Number/Physical Location

    HAER TX-116

Source Collection

    Historic American Engineering Record (Library of Congress)

Repository

    Library of Congress Prints and Photographs Division Washington, D.C. 20540 USA http://hdl.loc.gov/loc.pnp/pp.print

Control Number

    tx1106

Rights Advisory

    No known restrictions on images made by the U.S. Government; images copied from other sources may be restricted. https://www.loc.gov/rr/print/res/114_habs.html

Online Format

    image
    pdf



A qtar2/images/southstreet.jpg => qtar2/images/southstreet.jpg +0 -0
A qtar2/images/southstreet.txt => qtar2/images/southstreet.txt +86 -0
@@ 0,0 1,86 @@
public-domain, retrieved from https://www.loc.gov/resource/det.4a31822/ 2024-05-16

Title

    South Street and Brooklyn Bridge, New York City

Summary

    Photo shows the square rigger ship Antilla in foreground and the Cambuskenneth behind it. (Source: C. Seavey, 2022)

Names

    Detroit Publishing Co., publisher
    Detroit Publishing Co., copyright claimant

Created / Published

    c1901.

Headings

    -  Bridges
    -  Streets
    -  Piers & wharves
    -  Sailing ships
    -  Carts & wagons
    -  United States--New York (State)--New York

Headings

    Photochrom prints--Color.

Genre

    Photochrom prints--Color

Notes

    -  In album prepared by Detroit Photographic Co. to use as a catalog in its office.
    -  Ship in foreground: Antilia.
    -  Detroit Publishing Co., no. 53623.
    -  Gift; State Historical Society of Colorado; 1955.
    -  Exhibited as a digital copy in: "Not an Ostrich: And Other Images from America's Library" at the Annenberg Space for Photography, 2018; Detroit Publishing Co. section.

Medium

    1 photomechanical print : photochrom, color.

Call Number/Physical Location

    LOT 12006, p. 72 [P&P]

Source Collection

    Detroit Publishing Company photograph collection (Library of Congress)

Repository

    Library of Congress Prints and Photographs Division Washington, D.C. 20540 USA http://hdl.loc.gov/loc.pnp/pp.print

Digital Id

    det 4a31822 https://hdl.loc.gov/loc.pnp/det.4a31822
    cph 3g02655 https://hdl.loc.gov/loc.pnp/cph.3g02655

Library of Congress Control Number

    2016794197

Reproduction Number

    LC-DIG-det-4a31822 (digital file from original) LC-USZC4-2655 (color film copy transparency)

Rights Advisory

    No known restrictions on publication.

Online Format

    image

LCCN Permalink

    https://lccn.loc.gov/2016794197



A qtar2/make_redundant.sh => qtar2/make_redundant.sh +50 -0
@@ 0,0 1,50 @@
#!/bin/sh

set -e
set -u
set -x
cd "$(dirname "$0")"

QTAR_PATH="$(realpath ./qtar.py)"
MAX_RATIO=3

if [ $# -ne 2 ] ; then
	echo "usage: $0 INPUT_FILE OUTPUT_FILE" 1>&2
	exit 1
fi

INPUT_FILE="$1"
OUTPUT_FILE="$(realpath "$2")"
TEMP="$(mktemp -d)"

trap "rm -rf '$TEMP'" EXIT

cp "$INPUT_FILE" "$TEMP/$(basename "$INPUT_FILE")"
INPUT_FILE="$(basename "$INPUT_FILE")"
cd "$TEMP"

sha256sum --tag * > manifest.txt

par2 create -r20 "$INPUT_FILE.par2" "$INPUT_FILE"
par2 create -r20 "manifest.txt.par2" manifest.txt

MAX_SIZE="$(expr $(wc -c < "$INPUT_FILE") '*' $MAX_RATIO)"
while true ; do
	CUR_SIZE="$(du -b -a --max-depth=1 | sort -n -r | awk 'NR==1{print($1)}')"
	if [ "$CUR_SIZE" -gt "$MAX_SIZE" ]; then
		echo "too many par2 files, deleting some"
	else
		break
	fi
	TARGET="$(du -b -a --max-depth=1 | sort -n -r | grep -v "$INPUT_FILE"'$' | awk 'NR==2{print($2)}')"
	if [ ! -e "$TARGET" ] ; then
		echo "nonexistant target $TARGET" 1>&2
		exit 1
	fi
	rm "$TARGET"
done

ls -lah
du -h -d1 . | tail -n 3

python3 "$QTAR_PATH" --output "$OUTPUT_FILE" *

A qtar2/qtar.py => qtar2/qtar.py +189 -0
@@ 0,0 1,189 @@
#!/usr/bin/env python3

import argparse
import base64
import io
import json
import pathlib
import gzip
import hashlib
import sys
import tempfile

# https://stackoverflow.com/a/4900031
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont

pdfmetrics.registerFont(TTFont("Vera", "Vera.ttf"))
pdfmetrics.registerFont(TTFont("VeraBd", "VeraBd.ttf"))
pdfmetrics.registerFont(TTFont("VeraIt", "VeraIt.ttf"))
pdfmetrics.registerFont(TTFont("VeraBI", "VeraBI.ttf"))

import reportlab
import reportlab.pdfgen.canvas
import reportlab.lib.pagesizes
import reportlab.lib.units
import reportlab.pdfbase.ttfonts
import reportlab.pdfbase

import segno


def strtrunc(s, maxlen=24, elipses="..."):
    return (s[:maxlen] + elipses) if len(s) > maxlen else s


def encode_chunk(name, pos, content):
    h = hashlib.sha256()
    h.update(content)
    out = {
        "stream": name,
        "pos": pos,
        "len": len(content),
        "encoding": "none",
        "content": (base64.b64encode(content)).decode("utf-8"),
        "sha256": base64.b64encode(h.digest()).decode("utf-8"),
    }
    # if len(content) > 256:  # min size to get gzip
    #     oldcontent = out["content"]
    #     out["encoding"] = "gzip"
    #     out["content"] = base64.b64encode(gzip.compress(content)).decode("utf-8")
    #     if len(oldcontent) < len(out["content"]):
    #         sys.stderr.write(f'compression made chunk at pos={pos} bigger ({len(oldcontent)}<{len(out["content"])}), reverting it\n')
    #         out["content"] = oldcontent
    #         out["encoding"] = "none"

    return json.dumps(out)


def split_stream(st, name="-", maxbytes=512):
    pos = 0
    # https://stackoverflow.com/a/17056467
    while True:
        chunk = st.read(maxbytes)
        if not chunk:
            break
        # https://stackoverflow.com/a/1756156
        yield (encode_chunk(name, pos, chunk), pos)
        pos += len(chunk)


def draw_chunk(
    canvas,
    to_vert,
    to_horiz,
    name,
    pos,
    text,
    target_size,
    label_height=0.1 * reportlab.lib.units.inch,
    font="Vera",
    font_size=8,
):
    # qr = reportlab_qrcode.QRCodeImage(text, size=target_size)
    # qr.drawOn(canvas, to_horiz, to_vert+label_height)
    qrfile = tempfile.NamedTemporaryFile()
    qr = segno.make(text)
    qr.save(qrfile.name, "png", border=4, scale=3)
    canvas.drawImage(
        qrfile.name,
        to_horiz,
        to_vert + label_height,
        width=target_size,
        height=target_size,
    )
    canvas.setFont(font, font_size)
    canvas.drawString(to_horiz, to_vert + label_height / 2, f"stream={strtrunc(name)}")
    canvas.drawString(to_horiz, to_vert, f"pos={pos}")


def draw_stream(
    canvas,
    st,
    name,
    chunksize=1024,
    max_row=5,
    max_column=4,
    target_size=1.9 * reportlab.lib.units.inch,
    label_height=0.2 * reportlab.lib.units.inch,
    left_margin=0.25 * reportlab.lib.units.inch,
    bottom_margin=0.25 * reportlab.lib.units.inch,
    start_row=0,
    start_col=0,
):
    row = start_row
    col = start_col
    chunk_no = 0
    for chunk, pos in split_stream(st, name=name, maxbytes=chunksize):
        # sys.stderr.write(f"rendering {name}, {pos}: {chunk}\n")
        sys.stderr.write(f"rendering {name}, {pos}\n")
        target_horiz = left_margin + col * target_size
        target_vert = bottom_margin + row * (target_size + label_height)
        draw_chunk(
            canvas,
            target_vert,
            target_horiz,
            name,
            pos,
            chunk,
            target_size,
            label_height,
        )
        chunk_no += 1
        col += 1
        if col >= max_column:
            col = 0
            row += 1
        if row >= max_row:
            canvas.showPage()
            row = 0
            col = 0
    sys.stderr.write(f"in stream {name} rendered {chunk_no} chunks\n")
    return row, col


def create_pdf(output_path, files, pagesize=reportlab.lib.pagesizes.letter):
    c = reportlab.pdfgen.canvas.Canvas(str(output_path), pagesize=pagesize)

    row = 0
    col = 0

    # for imgfile in ["images/actiniae.txt", "images/camille.txt", "images/figurehead.txt", "images/southstreet.txt", "images/shuttle.jpg"]:
    for stream_path in files:

        with open(stream_path, "rb") as f:

            name = str(stream_path).strip()
            for ch in ["/", "\\", "-", " ", "\t", "\r", "\n"]:
                name = name.replace(ch, "_")

            row, col = draw_stream(c, f, name, start_row=row, start_col=col)

    c.save()


def main():

    parser = argparse.ArgumentParser(
        description="tool to save and load files to pages of QR codes"
    )

    parser.add_argument(
        "--output-path",
        "-o",
        default="out.pdf",
        type=pathlib.Path,
        help="Path to save generated PDF to",
    )

    parser.add_argument(
        "files", type=pathlib.Path, nargs="+", help="files to encode into qtar streams"
    )

    args = parser.parse_args()

    create_pdf(args.output_path, args.files)


if __name__ == "__main__":
    main()

A qtar2/quntar.py => qtar2/quntar.py +131 -0
@@ 0,0 1,131 @@
#!/usr/bin/env python3

import argparse
import os
import base64
import io
import json
import pathlib
import gzip
import hashlib
import sys
import tempfile

import pypdfium2

import pyzbar.pyzbar as pyzbar


def scan_pages(path, scale=4):
    pdf = pypdfium2.PdfDocument(path)
    for page_no in range(len(pdf)):
        # pagefile = tempfile.NamedTemporaryFile()
        page_image = pdf[page_no].render(scale=4).to_pil()
        qrcodes = pyzbar.decode(page_image)
        for qrcode in qrcodes:
            yield (page_no, qrcode.data.decode("utf-8"))


def decode_chunk(out_path, chunk):

    chunkobj = json.loads(chunk)
    for k in ["stream", "pos", "len", "encoding", "content"]:
        if k not in chunkobj:
            raise Exception(f"malformed chunk, missing '{k}' key")

    sys.stderr.write(f"chunkobj {chunkobj}\n")

    name = chunkobj["stream"].strip()
    for c in ["/", "\\", "-", " ", "\t", "\r", "\n"]:
        name = name.replace(c, "_")

    stream_path = str(pathlib.Path(out_path)) + name

    content = base64.b64decode(chunkobj["content"])

    if chunkobj["encoding"] == "none":
        pass
    elif chunkobj["encoding"] == "gzip":
        content = gzip.decompress(content)
    else:
        raise Exception(f"unknown encoding '{chunkobj['encoding']}'")

    checksum = base64.b64decode(chunkobj["sha256"]).hex()

    actual_checksum = hashlib.sha256(content).hexdigest()
    if actual_checksum != checksum:
        raise Exception(
            "checksum mismatch in {name} at pos={chunkobj['pos']}, dropping chunk\n"
        )

    if len(content) != chunkobj["len"]:
        raise Exception(
            "length mismatch in {name} at pos={chunkobj['len']}, dropping chunk\n"
        )

    sys.stderr.write(
        f"decoded {len(content)} bytes of content at pos={chunkobj['pos']} from stream {name} for blob {checksum}\n"
    )

    return (name, chunkobj["pos"], chunkobj["len"], stream_path, content)


def main():

    parser = argparse.ArgumentParser(
        description="tool to save and load files to pages of QR codes"
    )

    parser.add_argument(
        "--output-path",
        "-o",
        default="decoded_",
        type=pathlib.Path,
        help="Prefix for output streams.",
    )

    parser.add_argument(
        "--input-path",
        "-i",
        default="in.pdf",
        type=pathlib.Path,
        help="Input PDF to scan.",
    )

    args = parser.parse_args()

    failed = 0
    success = 0

    files = {}
    paths = {}

    for page_no, chunk in scan_pages(args.input_path):
        try:
            name, pos, count, stream_path, content = decode_chunk(
                args.output_path, chunk
            )
            if name not in files:
                files[name] = []
            files[name].append((name, pos, count, stream_path, content))
            paths[name] = stream_path
            success += 1
        except Exception as e:
            sys.stderr.write(
                f"ERROR: encountered exception {e} while processing page {page_no}\n"
            )

    for name in files:
        with open(paths[name], "wb") as f:
            for tup in sorted(files[name]):
                name, pos, count, stream_path, content = tup
                sys.stderr.write(f"writing {count} byte chunk for {name} at {pos}\n")
                f.write(content)
                if len(content) < count:
                    f.write(bytearray(count - len(conetnt)))

    sys.stderr.write(f"decoded {success} total chunks\n")


if __name__ == "__main__":
    main()

A qtar2/scan_redundant.sh => qtar2/scan_redundant.sh +40 -0
@@ 0,0 1,40 @@
#!/bin/sh

set -e
set -u
set -x
cd "$(dirname "$0")"

QUNTAR_PATH="$(realpath ./quntar.py)"

if [ $# -ne 2 ] ; then
	echo "usage: $0 INPUT_FILE OUTPUT_PATH" 1>&2
	exit 1
fi

mkdir -p "$2"
INPUT_FILE="$1"
OUTPUT_PATH="$(realpath "$2")"
TEMP="$(mktemp -d)"
trap "rm -rf '$TEMP'" EXIT

cp "$INPUT_FILE" "$TEMP/$(basename "$INPUT_FILE")"
INPUT_FILE="$(basename "$INPUT_FILE")"
cd "$TEMP"

python3 "$QUNTAR_PATH" -i "$INPUT_FILE" -o decoded
rm "$INPUT_FILE"
for f in decoded* ; do
	mv "$f" "$(echo "$f" | sed 's/^decoded//')"
done

find . -type f -name "*.par2" -print | grep -E -v '[.]vol[0-9+]*[.]par2$' | while read -r f ; do
	set +e
	par2 verify "$f" "$(basename "$f" .par2)"*
	set -e
	FILE_NAME="$(basename "$f" .par2)"
done

rm -f *.par2
cp * "$OUTPUT_PATH/"