]> gitweb.fluxo.info Git - httruta.git/commitdiff
Usage and cleaner httrack options
authorSilvio Rhatto <rhatto@riseup.net>
Mon, 26 Aug 2013 00:52:43 +0000 (21:52 -0300)
committerSilvio Rhatto <rhatto@riseup.net>
Mon, 26 Aug 2013 00:52:43 +0000 (21:52 -0300)
README.mdwn
config
lib/httracker/functions

index e9c9d7212a1681f25d2965febf16dffdf08f98d9..3309f60cdb589f68587baddd731a618cc054b775 100644 (file)
@@ -4,8 +4,16 @@ Feed Crawler
 Download all links from a feed using httrack. This is the engine behind the
 "Cache" feature used by https://links.sarava.org Semantic Scuttle instance.
 
+Usage
+-----
+
+Place this script somewhere and setup a cronjob like this:
+
+`*/5 * * * * /var/sites/arquivo/httracker/httracker &> /dev/null`
+
 TODO
 ----
 
 - Include all sites already donwloaded by scuttler.
 - Support for other fetchers like youtube-dl.
+- Lockfile support.
diff --git a/config b/config
index 828cfc17c0a191d1a8056c2d49e3fe39324b3fbf..f692713c0c0c48f43786476f478dde369a0b26d9 100644 (file)
--- a/config
+++ b/config
@@ -4,9 +4,8 @@ FEED="https://links.sarava.org/rss?sort=date_desc&count=100"
 TMP="/var/sites/arquivo/tmp/httracker"
 URLS="$TMP/urls-httracker.txt"
 URLS_SCUTTLER="$TMP/urls-scuttler.txt"
-LEVEL="1"
-EXT_LEVEL="1"
 FILESIZE=""
 USER="arquivo"
 GROUP="arquivo"
-DEPTH="1"
+DEPTH="2"
+EXT_DEPTH="1"
index a5144c90649aaf7a51d0acb4d2d0c5a04db7edc2..33152b1cca1d178b6c0a360bdc352178419535fa 100644 (file)
@@ -26,19 +26,17 @@ function httracker_get {
   fi
 
   # Get each URL
-  httrack               \
-    --mirror            \
-    --continue          \
-    --depth=${DEPTH}    \
-    --near              \
-    --purge-old=0       \
-    --index             \
-    --cookies=1         \
-    --path ${target}    \
-    -r${LEVEL} ${OPTS} ${url}
-    #-e%${EXT_LEVEL}    \
-    #-m$FILESIZE        \
-    #--verbose
+  httrack                    \
+    --mirror                 \
+    --continue               \
+    --depth=${DEPTH}         \
+    --ext-depth ${EXT_DEPTH} \
+    --near                   \
+    --purge-old=0            \
+    --index                  \
+    --cookies=1              \
+    --path ${target}         \
+    ${OPTS} ${url}
 
   if [ "$?" == "0" ]; then
     # Mark as downloaded
@@ -64,16 +62,17 @@ function httracker_get_incremental {
   fi
 
   # Grabs URLs from the network
-  httrack --verbose           \
-          --mirror            \
-          --continue          \
-          --user links        \
-          --depth=${DEPTH}    \
-          --near              \
-          --purge-old=0       \
-          --index             \
-          --cookies=1         \
-          --list ${URLS}      \
+  httrack                          \
+          --mirror                 \
+          --continue               \
+          --depth=${DEPTH}         \
+          --ext-depth ${EXT_DEPTH} \
+          --near                   \
+          --purge-old=0            \
+          --index                  \
+          --cookies=1              \
+          --user links             \
+          --list ${URLS}           \
           --path ${target} ${OPTS}
 
 }