]> gitweb.fluxo.info Git - ckandumper.git/commitdiff
Support for custom wget invocations
authorSilvio Rhatto <rhatto@riseup.net>
Wed, 22 May 2019 17:04:07 +0000 (14:04 -0300)
committerSilvio Rhatto <rhatto@riseup.net>
Wed, 22 May 2019 17:04:07 +0000 (14:04 -0300)
ckandumper

index 237d9613a72c02aaf0b554e3a34c317b31f7a92b..21a922beb022586edbb2477fae9768b80257ccea 100755 (executable)
@@ -24,7 +24,7 @@ import datetime
 import random
 import asyncio
 import argparse
-import sys, os, json
+import sys, os, subprocess, json
 from urllib.parse import urlencode
 from hashlib import sha256
 from tqdm import tqdm
@@ -33,8 +33,15 @@ class DownloadMultiple:
     """Downloads multiple files simultaneously with error logging and fancy output"""
 
     def __init__(self, limit_rate, limit_concurrent = 20, progress = True, debug = False, wget = '/usr/bin/wget'):
-        if not os.path.exists(wget):
-            raise FileNotFoundError('Wget not found in path ' + wget + '; please install it first.')
+        # Check for wget
+        wget_bin = wget.split(' ')[0]
+        if '/' in wget_bin and not os.path.exists(wget_bin):
+            raise FileNotFoundError('Wget not found in path ' + wget_bin + '; please install it first.')
+        else:
+            result = subprocess.check_call(wget_bin + ' --help', stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, shell=True)
+            #result = subprocess.call(wget_bin + ' --help', shell=True, stdout='/dev/null', stderr='/dev/null')
+            #if result == 127:
+            #    raise FileNotFoundError('Wget not found in path ' + wget_bin + '; please install it first.')
 
         self.limit_rate       = limit_rate
         self.limit_concurrent = asyncio.Semaphore(int(limit_concurrent))
@@ -325,13 +332,14 @@ if __name__ == "__main__":
 
       ckandumper --limit-concurrent=10 --limit-rate=100k --randomize https://open.canada.ca/data/en/ canada/
       ckandumper --limit-concurrent=10 --limit-rate=100k --randomize https://opendata.swiss/en/ switzerland/
+      ckandumper --limit-concurrent=10 --wget="wget --no-check-certificate" --randomize http://dados.gov.br
     """
     parser    = argparse.ArgumentParser(description='Dump CKAN metadata and datasets.', epilog=examples, formatter_class=argparse.RawDescriptionHelpFormatter,)
     parser.add_argument('url',                nargs='+',                                help='CKAN instance URL')
     parser.add_argument('dest',               nargs='+',                                help='Destination folder')
-    parser.add_argument('--limit-rate',                                                 help='Limit the download speed to amount bytes per second, per download')
+    parser.add_argument('--limit-rate',                                                 help='Limit the download speed to amount bytes per second, per download, shorthand for "--wget="wget --limit-rate"')
     parser.add_argument('--limit-concurrent',                                           help='Limit the total concurrent downloads')
-    parser.add_argument('--wget',                                                       help='Path of custom wget implementation')
+    parser.add_argument('--wget',                                                       help='Custom wget invocation')
     parser.add_argument('--debug',            dest='debug',     action='store_true',    help='Enable debug')
     parser.add_argument('--no-debug',         dest='debug',     action='store_false',   help='Disable debug')
     parser.add_argument('--progress',         dest='progress',  action='store_true',    help='Enable progress')
@@ -360,3 +368,6 @@ if __name__ == "__main__":
     except KeyboardInterrupt as e:
         print(e)
         exit(1)
+    except CalledProcessError as e:
+        print(e)
+        exit(1)