]> gitweb.fluxo.info Git - httruta.git/commitdiff
Adding httracker
authorSilvio Rhatto <rhatto@riseup.net>
Sun, 25 Aug 2013 19:32:09 +0000 (16:32 -0300)
committerSilvio Rhatto <rhatto@riseup.net>
Sun, 25 Aug 2013 19:32:09 +0000 (16:32 -0300)
httracker [new file with mode: 0755]

diff --git a/httracker b/httracker
new file mode 100755 (executable)
index 0000000..95701b1
--- /dev/null
+++ b/httracker
@@ -0,0 +1,67 @@
+#!/bin/bash
+#
+# Httrack feed downloader
+#
+
+# Configuration
+MIRRORS="/var/cache/arquivo/conteudo/links.sarava.org/assets"
+FEED="https://links.sarava.org/rss?sort=date_desc&count=50"
+TMP="/tmp/httracker"
+URLS="$TMP/urls.txt"
+LEVEL="1"
+EXT_LEVEL="1"
+FILESIZE=""
+USER="arquivo"
+GROUP="arquivo"
+
+function httracker_get {
+  local url="$1"
+  local hash="`echo $1 | sha1sum | cut -d ' ' -f 1`"
+  local target="$MIRRORS/$hash"
+
+  mkdir -p $target
+
+  # We already got this one
+  if [ -f "$target/ok" ]; then
+    return
+  fi
+
+  # Get each URL
+  httrack --verbose     \
+    --user $USER        \
+    --depth=1           \
+    --purge-old=0       \
+    --index             \
+    --cookies=1         \
+    --path ${target}    \
+    -r$LEVEL            \ 
+    -e%$EXT_LEVEL       \
+    #-m$FILESIZE         \
+    "$url"
+
+  if [ "$1" == "0" ]; then
+    # Mark as downloaded
+    touch $target/ok
+    chown -R $USER.$GROUP $target/
+  else
+    echo "Error fetching $url."
+    rm -rf $target
+  fi
+}
+
+# Create folders
+mkdir $MIRRORS $TMP
+
+# Get URL
+# Thanks http://stackoverflow.com/questions/443991/how-to-parse-rss-feeds-xml-in-a-shell-script
+curl $FEED | grep -o '<link>[^<]*' | grep -o "[^>]*$" > $URLS
+
+if [ "$?" != "0" ]; then
+  echo "Error downloading feed $FEED, aborting."
+  exit 1
+fi
+
+# Iterate over all URLs
+for link in `cat $URLS | xargs`; do
+  httracker_get $link
+done