~nova/accendo

ref: d14759f2e28df2fd6ab064b69e4a8c8ff273b324 accendo/main.sh -rw-r--r-- 1.2 KiB
d14759f2 — Novalinium Adjust regex for 2+ digit page counts 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash
POST_ID=$1
PAGE_SIZE=100
trap times EXIT;
mkdir -p ${POST_ID}; cd ${POST_ID}
echo -n "Retrieving page 1 ... "
curl -so ${POST_ID}.1.rp "https://www.glowfic.com/posts/${POST_ID}?per_page=${PAGE_SIZE}"
if [ -f ${POST_ID}.1.rp ]; then
    echo "Done!"
    echo -n "Generating index ... "
else
    echo "Failed!"
    exit 1
fi
MAX_PAGE=$(fgrep last_page ${POST_ID}.1.rp | perl -pe 's/.+\D(\d+)\S+;per_page=\d+\S+Last.*/\1/; print; exit')
if (( MAX_PAGE \> 1 )); then
    seq 2 ${MAX_PAGE} > ${POST_ID}.index
    echo "Done!"
    echo "Retrieving pages ... "
    xargs -a ${POST_ID}.index -P 4 -I {} -t curl -so ${POST_ID}.{}.rp "https://www.glowfic.com/posts/${POST_ID}?per_page=${PAGE_SIZE}&page={}"
    echo "Done!"
    echo -n "Verifying pages ..."
    for PAGE in `seq 1 ${MAX_PAGE}`; do
        if [ -f ${POST_ID}.${PAGE}.rp ]; then
            echo -n "."
        else
            echo " Failed! ${POST_ID}.${PAGE}.rp not found,"
            exit 1
        fi
    done
    echo " Done!"
    echo -n "Cleaning index ... ";
    rm ${POST_ID}.index
    echo "Done!"
else
    echo "No pages in index! Skipping page retrievals."
fi
echo "Completed retrieval of ${POST_ID}!"