]> gitweb.fluxo.info Git - ckandumper.git/commitdiff
Dump global stats to a file
authorSilvio Rhatto <rhatto@riseup.net>
Thu, 16 May 2019 12:15:37 +0000 (09:15 -0300)
committerSilvio Rhatto <rhatto@riseup.net>
Thu, 16 May 2019 12:15:37 +0000 (09:15 -0300)
ckandumper

index 5640dee3a86908f34a22d9c5f46326d4b81eedf5..d9f179b1e45c271338f03e5a5da33e497978800d 100755 (executable)
@@ -37,6 +37,7 @@ class DownloadMultiple:
         self.progress         = progress
         self.debug            = debug
         self.wget             = wget
+        self.globalstats      =  { 'exitstatus': {} }
 
     def ensuredir(self, dest):
         """Ensures that the destination folder exists"""
@@ -87,6 +88,11 @@ class DownloadMultiple:
             output.write(str(proc.returncode) + '\n')
             output.close()
 
+            if not str(proc.returncode) in self.globalstats['exitstatus']:
+                self.globalstats['exitstatus'][str(proc.returncode)] = []
+
+            self.globalstats['exitstatus'][str(proc.returncode)].append(url);
+
             if not str(proc.returncode) in self.stats['exitstatus']:
                 self.stats['exitstatus'][str(proc.returncode)] = []
 
@@ -178,6 +184,16 @@ class CkanDumper:
 
             print('')
 
+    def write_stats(self):
+        """Write global statistics to file"""
+        stats = open(self.dest + os.sep + 'ckandumper.stats.json', 'w')
+        stats.write(json.dumps(self.download.globalstats, indent=2) + '\n')
+
+    def process_stats(self, stats):
+        """Process stats at each run"""
+        self.dump_stats(stats)
+        self.write_stats()
+
     def dump(self):
         """Downloads all content listed in a CKAN repository"""
         package_list = self.dest + os.sep + 'package_list.json'
@@ -201,7 +217,7 @@ class CkanDumper:
             group_downloads.append([self.url + self.group_show + urlencode({ 'id': group }, False, '', 'utf-8'), group_file])
 
         stats = self.download.get(group_downloads)
-        self.dump_stats(stats)
+        self.process_stats(stats)
 
         #
         # Tags
@@ -220,6 +236,7 @@ class CkanDumper:
             tags_downloads.append([self.url + self.tag_show + urlencode({ 'id': tag }, False, '', 'utf-8'), tag_file])
 
         stats = self.download.get(tags_downloads)
+        self.process_stats(stats)
 
         #
         # Packages
@@ -239,6 +256,7 @@ class CkanDumper:
             packages_downloads.append([self.url + self.package_show + urlencode({ 'id': package }, False, '', 'utf-8'), package_file])
 
         stats = self.download.get(packages_downloads)
+        self.process_stats(stats)
 
         #
         # Package contents
@@ -268,6 +286,7 @@ class CkanDumper:
                 package_downloads.append([resource['url'], resource_file])
 
             stats = self.download.get(package_downloads)
+            self.process_stats(stats)
 
 if __name__ == "__main__":
     # Parse CLI