~nova/MAGScrape

MAGScrape/run.sh -rw-r--r-- 461 bytes
6e300524 — Novalinium Magnus Archives Statement Scraper 1 year, 3 months ago
                                                                                
1
2
3
4
5
6
7
8
#!/bin/sh
MAXEPISODE=160
seq 001 1 $MAXEPISODE | perl -ne 'printf "https://snarp.github.io/magnus_archives_transcripts/episode/%03d.html\n", $_' | xargs -I {} -P 8 curl -O {}
find -name '*.html' -exec sh -c 'elinks -dump-width 10000 -dump {} | tee {}.txt' \; 
rg -pi 'Statement [obe]' [0-9]*.txt | sed -r "s/\x1B\[([0-9]{1,3}(;[0-9]{1,2})?)?[mGK]//g" | tee segments.txt
$EDITOR statements.txt
cat segments.txt | perl cut.pl | sh
cat *statement > statements.txt