~nova/accendo

ref: 1ca32f520c5b92b86df68b213e6e257f527df866 accendo/main.sh -rw-r--r-- 1.2 KiB
1ca32f52 — Novalinium Faster max_page calculation 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
#!/bin/bash
POST_ID=$1
PAGE_SIZE=100
trap times EXIT;
mkdir -p ${POST_ID}; cd ${POST_ID}
echo -n "Retrieving page 1 ... "
curl -so ${POST_ID}.1.rp "https://www.glowfic.com/posts/${POST_ID}?per_page=${PAGE_SIZE}"
if [ -f ${POST_ID}.1.rp ]; then
    echo "Done!"
    echo -n "Generating index ... "
else
    echo "Failed!"
    exit 1
fi
MAX_PAGE=$(fgrep last_page ${POST_ID}.1.rp | perl -pe 's/.+(\d+)\S+;per_page=\d+\S+Last.*/\1/; print; exit')
if (( MAX_PAGE \> 1 )); then
    seq 2 ${MAX_PAGE} > ${POST_ID}.index
    echo "Done!"
    echo "Retrieving pages ... "
    xargs -a ${POST_ID}.index -P 4 -I {} -t curl -so ${POST_ID}.{}.rp "https://www.glowfic.com/posts/${POST_ID}?per_page=${PAGE_SIZE}&page={}"
    echo "Done!"
    echo -n "Verifying pages ..."
    for PAGE in `seq 1 ${MAX_PAGE}`; do
        if [ -f ${POST_ID}.${PAGE}.rp ]; then
            echo -n "."
        else
            echo "Failed! ${POST_ID}.${PAGE}.rp not found,"
            exit 1
        fi
    done
    echo "Done!"
    echo -n "Cleaning index ... ";
    rm ${POST_ID}.index
    echo "Done!"
else
    echo "No pages in index! Skipping page retrievals."
fi
echo "Completed retrieval of ${POST_ID}!"