]> gitweb.fluxo.info Git - ckandumper.git/commitdiff
Coding style
authorSilvio Rhatto <rhatto@riseup.net>
Mon, 13 May 2019 13:56:38 +0000 (10:56 -0300)
committerSilvio Rhatto <rhatto@riseup.net>
Mon, 13 May 2019 13:56:38 +0000 (10:56 -0300)
ckandumper

index 0d3bd315f94780f9d5d8a25bf29bb2c3b4e40c0a..3f8f08e3f9bc3b7c424e1891c9083c1a641c572c 100755 (executable)
@@ -24,117 +24,119 @@ import sys, os, subprocess, pycurl, json
 from urllib.parse import urlencode
 
 class ckandumper:
-  """Dumps CKAN data: metadata plus entire datasets"""
-
-  def __init__(self, args):
-    self.url          = args.url[0]
-    self.dest         = args.dest[0]
-    self.package_list = '/api/3/action/package_list'
-    self.package_show = '/api/3/action/package_show?'
-    self.group_list   = '/api/3/action/group_list'
-    self.group_show   = '/api/3/action/group_show?'
-    self.tag_list     = '/api/3/action/tag_list'
-    self.tag_show     = '/api/3/action/tag_show?'
-
-    if args.limit_rate != None
-        self.limit_rate = '--limit-rate=' + args.limit_rate
-
-  # Using wget as it is more reliable
-  def download(self, url, local_filename):
-    subprocess.call('/usr/bin/wget ' + self.limit_rate + ' -c -O "' + local_filename + '" ' + url, shell=True)
-
-  def ensuredir(self, dest):
-    # Ensure that the destination folder exists
-    if not os.path.exists(dest) and not os.path.isdir(dest):
-      os.makedirs(dest, 0o755);
-    elif os.path.exists(dest) and not os.path.isdir(dest):
-      raise ValueError('File exists and is not a folder:' + dest)
-
-  def loadJSON(self, file):
-    descriptor = open(file)
-    data       = json.load(descriptor)
-    file.close()
-
-  def dump(self):
-    self.ensuredir(self.dest)
-
-    # Move to dest folder
-    #os.chdir(self.dest)
-
-    package_list = self.dest + os.sep + 'package_list.json'
-    group_list   = self.dest + os.sep + 'group_list.json'
-    tag_list     = self.dest + os.sep + 'tag_list.json'
-
-    #
-    # Groups
-    #
-    self.download(self.url + self.group_list, group_list)
-    groups = self.loadJSON(group_list)
-
-    for group in groups['result']:
-      group_folder = self.dest + os.sep + 'groups' + os.sep + group
-      group_file   = group_folder + os.sep + 'group.json'
-      self.ensuredir(group_folder)
-      print("Downloading " + self.url + self.group_show + 'id=' + group + '...')
-      self.download(self.url + self.group_show + urlencode({ 'id': group }, False, '', 'utf-8'), group_file)
-
-    #
-    # Tags
-    #
-    self.download(self.url + self.tag_list, tag_list)
-    tags = self.loadJSON(tag_list)
-
-    for tag in tags['result']:
-      tag_folder = self.dest + os.sep + 'tags' + os.sep + tag
-      tag_file   = tag_folder + os.sep + 'tag.json'
-      self.ensuredir(tag_folder)
-      print("Downloading " + self.url + self.tag_show + 'id=' + tag + '...')
-      self.download(self.url + self.tag_show + urlencode({ 'id': tag }, False, '', 'utf-8'), tag_file)
-
-    #
-    # Packages
-    #
-    self.download(self.url + self.package_list, package_list)
-    packages = self.loadJSON(package_list)
-
-    for package in packages['result']:
-      package_folder = self.dest + os.sep + 'packages' + os.sep + package
-      package_file   = package_folder + os.sep + 'package.json'
-      self.ensuredir(package_folder + os.sep + 'data')
-      print("Downloading " + self.url + self.package_show + 'id=' + package + '...')
-      self.download(self.url + self.package_show + urlencode({ 'id': package }, False, '', 'utf-8'), package_file)
-
-      contents = self.loadJSON(package_file)
-
-      for resource in contents['result']['resources']:
-        #if resource['name'] != None: 
-        #  name = resource['name']
-        #else
-        #  name = resource['id']
-
-        name = resource['id']
-
-        if resource['format'] != None:
-          format = '.' + resource['format'].lower()
-        else:
-          format = ''
-
-        resource_file = package_folder + os.sep + 'data' + os.sep + name + format
-
-        self.download(resource['url'], resource_file)
-
-      # Run only once during development
-      #return
+    """Dumps CKAN data: metadata plus entire datasets"""
+
+    def __init__(self, args):
+        self.url          = args.url[0]
+        self.dest         = args.dest[0]
+        self.package_list = '/api/3/action/package_list'
+        self.package_show = '/api/3/action/package_show?'
+        self.group_list   = '/api/3/action/group_list'
+        self.group_show   = '/api/3/action/group_show?'
+        self.tag_list     = '/api/3/action/tag_list'
+        self.tag_show     = '/api/3/action/tag_show?'
+
+        if args.limit_rate != None:
+            self.limit_rate = '--limit-rate=' + args.limit_rate
+
+    # Using wget as it is more reliable
+    def download(self, url, local_filename):
+        subprocess.call('/usr/bin/wget ' + self.limit_rate + ' -c -O "' + local_filename + '" ' + url, shell=True)
+
+    def ensuredir(self, dest):
+        # Ensure that the destination folder exists
+        if not os.path.exists(dest) and not os.path.isdir(dest):
+            os.makedirs(dest, 0o755);
+        elif os.path.exists(dest) and not os.path.isdir(dest):
+            raise ValueError('File exists and is not a folder:' + dest)
+
+    def loadJSON(self, file):
+        descriptor = open(file)
+        data       = json.load(descriptor)
+        file.close()
+
+    def dump(self):
+        self.ensuredir(self.dest)
+
+        # Move to dest folder
+        #os.chdir(self.dest)
+
+        package_list = self.dest + os.sep + 'package_list.json'
+        group_list   = self.dest + os.sep + 'group_list.json'
+        tag_list     = self.dest + os.sep + 'tag_list.json'
+
+        #
+        # Groups
+        #
+        self.download(self.url + self.group_list, group_list)
+        groups = self.loadJSON(group_list)
+
+        for group in groups['result']:
+            group_folder = self.dest + os.sep + 'groups' + os.sep + group
+            group_file   = group_folder + os.sep + 'group.json'
+
+            self.ensuredir(group_folder)
+            print("Downloading " + self.url + self.group_show + 'id=' + group + '...')
+            self.download(self.url + self.group_show + urlencode({ 'id': group }, False, '', 'utf-8'), group_file)
+        #
+        # Tags
+        #
+        self.download(self.url + self.tag_list, tag_list)
+        tags = self.loadJSON(tag_list)
+
+        for tag in tags['result']:
+            tag_folder = self.dest + os.sep + 'tags' + os.sep + tag
+            tag_file   = tag_folder + os.sep + 'tag.json'
+
+            self.ensuredir(tag_folder)
+            print("Downloading " + self.url + self.tag_show + 'id=' + tag + '...')
+            self.download(self.url + self.tag_show + urlencode({ 'id': tag }, False, '', 'utf-8'), tag_file)
+
+        #
+        # Packages
+        #
+        self.download(self.url + self.package_list, package_list)
+        packages = self.loadJSON(package_list)
+
+        for package in packages['result']:
+            package_folder = self.dest + os.sep + 'packages' + os.sep + package
+            package_file   = package_folder + os.sep + 'package.json'
+
+            self.ensuredir(package_folder + os.sep + 'data')
+            print("Downloading " + self.url + self.package_show + 'id=' + package + '...')
+            self.download(self.url + self.package_show + urlencode({ 'id': package }, False, '', 'utf-8'), package_file)
+
+            contents = self.loadJSON(package_file)
+
+            for resource in contents['result']['resources']:
+                #if resource['name'] != None:
+                #  name = resource['name']
+                #else
+                #  name = resource['id']
+
+                name = resource['id']
+
+                if resource['format'] != None:
+                    format = '.' + resource['format'].lower()
+                else:
+                    format = ''
+
+                resource_file = package_folder + os.sep + 'data' + os.sep + name + format
+
+                self.download(resource['url'], resource_file)
+
+            # Run only once during development
+            #return
 
 # Standalone usage
 if __name__ == "__main__":
-  # Parse CLI
-  parser = argparse.ArgumentParser(description='Dump CKAN metadata and datasets.')
-  parser.add_argument('url',  nargs='+', help='CKAN instance URL')
-  parser.add_argument('dest', nargs='+', help='Destination folder')
-  parser.add_argument("--limit-rate",    help="Limit the download speed to amount bytes per second, per download")
-  args = parser.parse_args()
-
-  # Dispatch
-  ckan = ckandumper(args)
-  ckan.dump()
+    # Parse CLI
+    parser = argparse.ArgumentParser(description='Dump CKAN metadata and datasets.')
+    parser.add_argument('url',  nargs='+', help='CKAN instance URL')
+    parser.add_argument('dest', nargs='+', help='Destination folder')
+    parser.add_argument("--limit-rate",    help="Limit the download speed to amount bytes per second, per download")
+    args = parser.parse_args()
+
+    # Dispatch
+    ckan = ckandumper(args)
+    ckan.dump()