]> gitweb.fluxo.info Git - ckandumper.git/commitdiff
Dump stats for each batch
authorSilvio Rhatto <rhatto@riseup.net>
Thu, 16 May 2019 12:06:41 +0000 (09:06 -0300)
committerSilvio Rhatto <rhatto@riseup.net>
Thu, 16 May 2019 12:06:41 +0000 (09:06 -0300)
ckandumper

index affaf24b63631a9793b6e01a803b35e2c8d970e8..5640dee3a86908f34a22d9c5f46326d4b81eedf5 100755 (executable)
@@ -114,7 +114,7 @@ class DownloadMultiple:
         await asyncio.gather(*jobs)
 
     def get(self, filepairs):
-        self.stats = { 'exitstatus': {} }
+        self.stats =  { 'exitstatus': {} }
         self.bar   = tqdm(total=len(filepairs)) if self.progress and len(filepairs) > 1 else False
         loop       = asyncio.get_event_loop()
 
@@ -168,6 +168,16 @@ class CkanDumper:
         descriptor.close()
         return data
 
+    def dump_stats(self, stats):
+        """Dump download batch statistics"""
+        if stats != None:
+            print('Statistics (exit status / total downloads): ', end='')
+
+            for status in stats['exitstatus']:
+                print(status + ': ' + str(len(stats['exitstatus'][status])), end='; ')
+
+            print('')
+
     def dump(self):
         """Downloads all content listed in a CKAN repository"""
         package_list = self.dest + os.sep + 'package_list.json'
@@ -178,7 +188,7 @@ class CkanDumper:
         # Groups
         #
         print(f'Downloading {self.url}{self.group_list}...')
-        status = self.download.get([[self.url + self.group_list, group_list]])
+        stats = self.download.get([[self.url + self.group_list, group_list]])
 
         groups          = self.load_json(group_list)
         group_downloads = []
@@ -190,13 +200,14 @@ class CkanDumper:
 
             group_downloads.append([self.url + self.group_show + urlencode({ 'id': group }, False, '', 'utf-8'), group_file])
 
-        status = self.download.get(group_downloads)
+        stats = self.download.get(group_downloads)
+        self.dump_stats(stats)
 
         #
         # Tags
         #
         print(f'Downloading {self.url}{self.tag_list}...')
-        status = self.download.get([[self.url + self.tag_list, tag_list]])
+        stats = self.download.get([[self.url + self.tag_list, tag_list]])
 
         tags           = self.load_json(tag_list)
         tags_downloads = []
@@ -208,13 +219,13 @@ class CkanDumper:
 
             tags_downloads.append([self.url + self.tag_show + urlencode({ 'id': tag }, False, '', 'utf-8'), tag_file])
 
-        status = self.download.get(tags_downloads)
+        stats = self.download.get(tags_downloads)
 
         #
         # Packages
         #
         print(f'Downloading {self.url}{self.package_list}...')
-        status = self.download.get([[self.url + self.package_list, package_list]])
+        stats = self.download.get([[self.url + self.package_list, package_list]])
 
         packages           = self.load_json(package_list)
         packages_downloads = []
@@ -227,7 +238,7 @@ class CkanDumper:
 
             packages_downloads.append([self.url + self.package_show + urlencode({ 'id': package }, False, '', 'utf-8'), package_file])
 
-        status = self.download.get(packages_downloads)
+        stats = self.download.get(packages_downloads)
 
         #
         # Package contents
@@ -256,7 +267,7 @@ class CkanDumper:
 
                 package_downloads.append([resource['url'], resource_file])
 
-            status = self.download.get(package_downloads)
+            stats = self.download.get(package_downloads)
 
 if __name__ == "__main__":
     # Parse CLI