~nova/accendo

accendo/accendo.sh -rw-r--r-- 2.3 KiB
5a4ec67e — Novalinium Better options for configuration, refactored conf to top of file, more error checking, README 2 years ago
                                                                                
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/bin/bash
POST_ID=$1
TARGET_DIRECTORY=$2
BASE_PROTO=https
BASE_HOST=www.glowfic.com
BASE_PATH=posts
PAGE_SIZE=100

# https://stackoverflow.com/questions/59895/getting-the-source-directory-of-a-bash-script-from-within#comment49538168_246128
SCRIPT_DIR=$( dirname "$(readlink -f "$0")" )

if [ -z "$POST_ID" ]; then
    echo "Usage: $0 POST_ID [TARGET_DIRECTORY]"
    echo "This script downloads glowfic by ID or URL to TARGET_DIRECTORY, or ./POST_ID if the directory is not specified."
    exit 1;
fi

trap times EXIT;

re='^[0-9]+$'
if ! [[ $POST_ID =~ $re ]] ; then
    POST_ID=$( echo $POST_ID | perl -ne '/\/(\d+)/; print $1' )
fi
if ! [[ $POST_ID =~ $re ]] ; then
    echo "Post ID ${POST_ID} not a number!"
    exit 1
fi

if [ -d "$TARGET_DIRECTORY" ]; then
    cd ${TARGET_DIRECTORY}
else
    TARGET_DIRECTORY=$PWD/$POST_ID
    mkdir -p ${TARGET_DIRECTORY}
    cd ${TARGET_DIRECTORY}
fi

echo -n "Retrieving page 1 ... "
curl -K $SCRIPT_DIR/curl_parameters -o ${POST_ID}.1.rp "${BASE_PROTO}://${BASE_HOST}/${BASE_PATH}/${POST_ID}?per_page=${PAGE_SIZE}"
if [ -f ${POST_ID}.1.rp ]; then
    if fgrep -q Glowfic ${POST_ID}.1.rp; then
        echo "Done!"
        echo -n "Generating index ... "
    else
        echo "Glowfic with post ID ${POST_ID} does not exist! See ${TARGET_DIRECTORY} for details."
        exit 1
    fi
else
    echo "Failed!"
    exit 1
fi
MAX_PAGE=$(fgrep last_page ${POST_ID}.1.rp | perl -pe 's/.+\D(\d+)\S+;per_page=\d+\S+Last.*/\1/; print; exit')
if (( MAX_PAGE \> 1 )); then
    seq 2 ${MAX_PAGE} > ${POST_ID}.index
    echo "Done!"
    echo "Retrieving pages ... "
    xargs -a ${POST_ID}.index -P 4 -I {} -t curl -K $SCRIPT_DIR/curl_parameters -o ${POST_ID}.{}.rp "${BASE_PROTO}://${BASE_HOST}/${BASE_PATH}/${POST_ID}?per_page=${PAGE_SIZE}&page={}"
    echo "Done!"
    echo -n "Verifying pages ..."
    for PAGE in `seq 1 ${MAX_PAGE}`; do
        if [ -f ${POST_ID}.${PAGE}.rp ]; then
            echo -n "."
        else
            echo " Failed! ${POST_ID}.${PAGE}.rp not found,"
            exit 1
        fi
    done
    echo " Done!"
    echo -n "Cleaning index ... ";
    rm ${POST_ID}.index
    echo "Done!"
else
    echo "No pages in index! Skipping page retrievals."
fi
echo -n "Downloaded size: "
du -ch . | tail -n 1 | cut -f 1
echo "Completed retrieval of ${POST_ID} to ${TARGET_DIRECTORY}!"