~ben/bin

95a3cb30f61e90fe00cd859f10e323bbc3e62e99 — Ben Sima 9 months ago 517bd3c
cleanup textract and rdr
2 files changed, 12 insertions(+), 12 deletions(-)

M rdr
M textract
M rdr => rdr +3 -2
@@ 2,5 2,6 @@
#! nix-shell -i bash -p pandoc

textract "$1" \
  | pandoc -f html -t markdown \
  | less 
  | pandoc -f html -t markdown --strip-comments --reference-links \
  | sed 's/^:::.*$//g' \
  | sed 's/{.css.*}//g'

M textract => textract +9 -10
@@ 9,13 9,12 @@ import sys
cli = argparse.ArgumentParser('read a url')
cli.add_argument('url', type=str)

if __name__ == '__main__':
    args = cli.parse_args()
    try:
        resp = requests.get(args.url)
        doc = Document(resp.text)
        sys.stdout.write(doc.summary())
        sys.exit(0)
    except:
        print("Could not fetch document.")
        sys.exit(1)
args = cli.parse_args()

try:
    resp = requests.get(args.url)
    doc = Document(resp.text)
    sys.stdout.write(doc.summary())
except:
    print("textract: Could not fetch document.")
    sys.exit(1)