~nova/mag-fear-scraper

cdb6dc185c1529dfd9b028ae674a845eea28088b — Nova Devereaux 1 year, 1 month ago 718b8ed
Swap output columns
1 files changed, 1 insertions(+), 1 deletions(-)

M build.sh
M build.sh => build.sh +1 -1
@@ 3,4 3,4 @@ wget https://the-magnus-archives.fandom.com/wiki/Episodes
pup 'a[href*=":_"][href*="MAG"] attr{href}' < Episodes | sort -u | xargs -I {} wget https://the-magnus-archives.fandom.com{}
fgrep Related\ Entity MAG* -A 3 | fgrep href | perl -pe 'y/_/ /; s/-//; s/<div.*?">//; s/<\/div>//; s/<a.*?>(.*?)<\/a>[^<]*/$1, /g; s/<.*?>//g; s/(\t.*)(?:Primarily|Possibly) /$1/; s/, $//; $_ .= "\n"' | sort -k 2 -n > episode_fear_list_denormalized.tsv
perl -ne 'chomp; @f = split "\t"; ($id) = ($f[0] =~ /([0-9]*):/); $f[1] =~ s/The //g; @g = split ", ", $f[1]; print "$id\t$_\n" foreach @g' < episode_fear_list_denormalized.tsv > episode_fear_list_normalized.tsv
cut -f 2 episode_fear_list_normalized.tsv | sort | uniq -c | sort -n | sed 's/^ *//' > fear_counts.tsv
cut -f 2 episode_fear_list_normalized.tsv | sort | uniq -c | sort -n | perl -ane '@F[0] =~ s/^ *//; print "$F[1]\t$F[0]\n"' > fear_counts.tsv