@@ 2,7 2,7 @@
date=`date -u "+%Y-%m-%d"`
mkdir -p articles
-touch seen-article-urls
+touch all-article-urls seen-article-urls
printf "Downloading feed files:\n"
while read feed_url
@@ 15,7 15,7 @@ do
done < feed-urls
# for lines beginning with one '+', remove the '+' and print them
-diff seen-article-urls all-article-urls | sed -n '/^+[^+]/ s/^+//p' > new-article-urls
+diff -u seen-article-urls all-article-urls | sed -n '/^+[^+]/ s/^+//p' > new-article-urls
printf "\nExtracted new article URLs\n\n"
printf "Downloading articles and converting them:\n"
@@ 25,8 25,8 @@ do
# Use Mozilla's readability.js to extract content
node make-readable.js temp.html > readable.html
# Get title from first line of readable.html
- # and change any '/' to '|' for use as a filename
- title=`head -1 readable.html | sed -e 's/\//|/g'`
+ # and change any '/' to ' of ' for use as a filename
+ title=`head -1 readable.html | sed -e 's/\// of /g; s/:/ - /g'`
mv readable.html "articles/$date - $title.html"
printf "Got $title\n"
done < new-article-urls