~nova/accendo

5a4ec67e142d56ac20f3886b833ad5b287baf2c9 — Novalinium 4 years ago 3786a41 master
Better options for configuration, refactored conf to top of file, more error checking, README
2 files changed, 70 insertions(+), 7 deletions(-)

A README
R main.sh => accendo.sh
A README => README +28 -0
@@ 0,0 1,28 @@
Usage: accendo.sh POST_ID [TARGET_DIRECTORY]
This script downloads glowfic by ID or URL to TARGET_DIRECTORY, or ./POST_ID if the directory is not specified.

Example:
```
[~nova] $ which accendo.sh
/usr/local/accendo/accendo.sh
[~nova] $ pwd
/home/nova
[~nova] $ sh accendo.sh 1685
Retrieving page 1 ... Done!
Generating index ... Done!
Retrieving pages ... 
curl -K /usr/local/accendo/curl_parameters -o 1685.2.rp https://www.glowfic.com/posts/1685?per_page=100&page=2 
curl -K /usr/local/accendo/curl_parameters -o 1685.3.rp https://www.glowfic.com/posts/1685?per_page=100&page=3 
Done!
Verifying pages ...... Done!
Cleaning index ... Done!
Downloaded size: 488K
Completed retrieval of 1685 to /home/nova/1685!
0m0.008s 0m0.008s
0m0.112s 0m0.056s
[~nova] $ ls 1685
total 484
-rw-r--r-- 1 lin lin 166555 Jan 15 13:51 1685.1.rp
-rw-r--r-- 1 lin lin 161548 Jan 15 13:51 1685.2.rp
-rw-r--r-- 1 lin lin 161035 Jan 15 13:51 1685.3.rp
```

R main.sh => accendo.sh +42 -7
@@ 1,14 1,49 @@
#!/bin/bash
POST_ID=$1
TARGET_DIRECTORY=$2
BASE_PROTO=https
BASE_HOST=www.glowfic.com
BASE_PATH=posts
PAGE_SIZE=100
SCRIPT_DIR=$PWD

# https://stackoverflow.com/questions/59895/getting-the-source-directory-of-a-bash-script-from-within#comment49538168_246128
SCRIPT_DIR=$( dirname "$(readlink -f "$0")" )

if [ -z "$POST_ID" ]; then
    echo "Usage: $0 POST_ID [TARGET_DIRECTORY]"
    echo "This script downloads glowfic by ID or URL to TARGET_DIRECTORY, or ./POST_ID if the directory is not specified."
    exit 1;
fi

trap times EXIT;
mkdir -p ${POST_ID}; cd ${POST_ID}

re='^[0-9]+$'
if ! [[ $POST_ID =~ $re ]] ; then
    POST_ID=$( echo $POST_ID | perl -ne '/\/(\d+)/; print $1' )
fi
if ! [[ $POST_ID =~ $re ]] ; then
    echo "Post ID ${POST_ID} not a number!"
    exit 1
fi

if [ -d "$TARGET_DIRECTORY" ]; then
    cd ${TARGET_DIRECTORY}
else
    TARGET_DIRECTORY=$PWD/$POST_ID
    mkdir -p ${TARGET_DIRECTORY}
    cd ${TARGET_DIRECTORY}
fi

echo -n "Retrieving page 1 ... "
curl -K $SCRIPT_DIR/curl_parameters -o ${POST_ID}.1.rp "https://www.glowfic.com/posts/${POST_ID}?per_page=${PAGE_SIZE}"
curl -K $SCRIPT_DIR/curl_parameters -o ${POST_ID}.1.rp "${BASE_PROTO}://${BASE_HOST}/${BASE_PATH}/${POST_ID}?per_page=${PAGE_SIZE}"
if [ -f ${POST_ID}.1.rp ]; then
    echo "Done!"
    echo -n "Generating index ... "
    if fgrep -q Glowfic ${POST_ID}.1.rp; then
        echo "Done!"
        echo -n "Generating index ... "
    else
        echo "Glowfic with post ID ${POST_ID} does not exist! See ${TARGET_DIRECTORY} for details."
        exit 1
    fi
else
    echo "Failed!"
    exit 1


@@ 18,7 53,7 @@ if (( MAX_PAGE \> 1 )); then
    seq 2 ${MAX_PAGE} > ${POST_ID}.index
    echo "Done!"
    echo "Retrieving pages ... "
    xargs -a ${POST_ID}.index -P 4 -I {} -t curl -K $SCRIPT_DIR/curl_parameters -o ${POST_ID}.{}.rp "https://www.glowfic.com/posts/${POST_ID}?per_page=${PAGE_SIZE}&page={}"
    xargs -a ${POST_ID}.index -P 4 -I {} -t curl -K $SCRIPT_DIR/curl_parameters -o ${POST_ID}.{}.rp "${BASE_PROTO}://${BASE_HOST}/${BASE_PATH}/${POST_ID}?per_page=${PAGE_SIZE}&page={}"
    echo "Done!"
    echo -n "Verifying pages ..."
    for PAGE in `seq 1 ${MAX_PAGE}`; do


@@ 38,4 73,4 @@ else
fi
echo -n "Downloaded size: "
du -ch . | tail -n 1 | cut -f 1
echo "Completed retrieval of ${POST_ID}!"
echo "Completed retrieval of ${POST_ID} to ${TARGET_DIRECTORY}!"