Usage and cleaner httrack options

author Silvio Rhatto <rhatto@riseup.net>

Mon, 26 Aug 2013 00:52:43 +0000 (21:52 -0300)

committer Silvio Rhatto <rhatto@riseup.net>

Mon, 26 Aug 2013 00:52:43 +0000 (21:52 -0300)
author Silvio Rhatto <rhatto@riseup.net>
Mon, 26 Aug 2013 00:52:43 +0000 (21:52 -0300)
committer Silvio Rhatto <rhatto@riseup.net>
Mon, 26 Aug 2013 00:52:43 +0000 (21:52 -0300)
diff --git a/README.mdwn b/README.mdwn

index e9c9d7212a1681f25d2965febf16dffdf08f98d9..3309f60cdb589f68587baddd731a618cc054b775 100644 (file)
--- a/README.mdwn
+++ b/README.mdwn
@@ -4,8 +4,16 @@ Feed Crawler
  Download all links from a feed using httrack. This is the engine behind the
  "Cache" feature used by https://links.sarava.org Semantic Scuttle instance.
  
+Usage
+-----
+
+Place this script somewhere and setup a cronjob like this:
+
+`*/5 * * * * /var/sites/arquivo/httracker/httracker &> /dev/null`
+
  TODO
  ----
  
  - Include all sites already donwloaded by scuttler.
  - Support for other fetchers like youtube-dl.
+- Lockfile support.
diff --git a/config b/config

index 828cfc17c0a191d1a8056c2d49e3fe39324b3fbf..f692713c0c0c48f43786476f478dde369a0b26d9 100644 (file)
--- a/config
+++ b/config
@@ -4,9 +4,8 @@ FEED="https://links.sarava.org/rss?sort=date_desc&count=100"
  TMP="/var/sites/arquivo/tmp/httracker"
  URLS="$TMP/urls-httracker.txt"
  URLS_SCUTTLER="$TMP/urls-scuttler.txt"
-LEVEL="1"
-EXT_LEVEL="1"
  FILESIZE=""
  USER="arquivo"
  GROUP="arquivo"
-DEPTH="1"
+DEPTH="2"
+EXT_DEPTH="1"
diff --git a/lib/httracker/functions b/lib/httracker/functions

index a5144c90649aaf7a51d0acb4d2d0c5a04db7edc2..33152b1cca1d178b6c0a360bdc352178419535fa 100644 (file)
--- a/lib/httracker/functions
+++ b/lib/httracker/functions
@@ -26,19 +26,17 @@ function httracker_get {
    fi
  
    # Get each URL
-  httrack               \
-    --mirror            \
-    --continue          \
-    --depth=${DEPTH}    \
-    --near              \
-    --purge-old=0       \
-    --index             \
-    --cookies=1         \
-    --path ${target}    \
-    -r${LEVEL} ${OPTS} ${url}
-    #-e%${EXT_LEVEL}    \
-    #-m$FILESIZE        \
-    #--verbose
+  httrack                    \
+    --mirror                 \
+    --continue               \
+    --depth=${DEPTH}         \
+    --ext-depth ${EXT_DEPTH} \
+    --near                   \
+    --purge-old=0            \
+    --index                  \
+    --cookies=1              \
+    --path ${target}         \
+    ${OPTS} ${url}
  
    if [ "$?" == "0" ]; then
      # Mark as downloaded
@@ -64,16 +62,17 @@ function httracker_get_incremental {
    fi
  
    # Grabs URLs from the network
-  httrack --verbose           \
-          --mirror            \
-          --continue          \
-          --user links        \
-          --depth=${DEPTH}    \
-          --near              \
-          --purge-old=0       \
-          --index             \
-          --cookies=1         \
-          --list ${URLS}      \
+  httrack                          \
+          --mirror                 \
+          --continue               \
+          --depth=${DEPTH}         \
+          --ext-depth ${EXT_DEPTH} \
+          --near                   \
+          --purge-old=0            \
+          --index                  \
+          --cookies=1              \
+          --user links             \
+          --list ${URLS}           \
            --path ${target} ${OPTS}
  
  }
author	Silvio Rhatto <rhatto@riseup.net>
	Mon, 26 Aug 2013 00:52:43 +0000 (21:52 -0300)
committer	Silvio Rhatto <rhatto@riseup.net>
	Mon, 26 Aug 2013 00:52:43 +0000 (21:52 -0300)
README.mdwn		patch \| blob \| history
config		patch \| blob \| history
lib/httracker/functions		patch \| blob \| history