Feat: adds --check option for the test suite

author Silvio Rhatto <rhatto@riseup.net>

Fri, 29 Jan 2021 01:10:13 +0000 (22:10 -0300)

committer Silvio Rhatto <rhatto@riseup.net>

Fri, 29 Jan 2021 01:10:13 +0000 (22:10 -0300)
author Silvio Rhatto <rhatto@riseup.net>
Fri, 29 Jan 2021 01:10:13 +0000 (22:10 -0300)
committer Silvio Rhatto <rhatto@riseup.net>
Fri, 29 Jan 2021 01:10:13 +0000 (22:10 -0300)
diff --git a/Makefile b/Makefile

index 1a13c569019c55b321272bd9d15e78941fc43068..8732717eff141eee10a7f7895e5e9025aff91bd1 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -19,7 +19,7 @@ sample:
         pipenv run ./csv-sampler.py --iterations $(SAMPLE_ITERATIONS) --rows_per_iteration $(SAMPLE_ROWS_PER_ITERATION) $(SAMPLE)
  
  test-sample:
-       pipenv run ./csv-hasher.py --chunksize $(CHUNKSIZE) $(SAMPLE) $(OUTPUT) $(COLNAME)
+       pipenv run ./csv-hasher.py --check --chunksize $(CHUNKSIZE) $(SAMPLE) $(OUTPUT) $(COLNAME)
  
  show-test-output:
         head -$(CHECK_LINES) $(SAMPLE)
diff --git a/csv-hasher.py b/csv-hasher.py

index bdd9950acae68b1c64f03905040f6f236dc0ce5d..c07adb0b7214e3a9eb70c89fdcb92b1be949bd3d 100755 (executable)
--- a/csv-hasher.py
+++ b/csv-hasher.py
@@ -151,6 +151,21 @@ class CsvHasher:
          if hasattr(progress_bar, 'close'):
              progress_bar.close()
  
+    def check(self):
+        """Check both files for differences"""
+
+        df_infile  = pd.read_csv(self.args.infile[0],  sep=self.args.sep)
+        df_outfile = pd.read_csv(self.args.outfile[0], sep=self.args.sep)
+
+        print('Comparing both files without excluding the ' + self.args.colname[0] + ' column:')
+        print(df_infile.compare(df_outfile))
+
+        del df_infile[self.args.colname[0]]
+        del df_outfile[self.args.colname[0]]
+
+        print('Comparing both files excluding the ' + self.args.colname[0] + ' column:')
+        print(df_infile.compare(df_outfile))
+
  def cmdline():
      """
      Evalutate the command line.
@@ -185,11 +200,15 @@ def cmdline():
      parser.add_argument('--no-progress', dest='progress', action='store_false',
                          help='Disable progress bar.')
  
+    parser.add_argument('--check', dest='check', action='store_true',
+                        help='Check both files for differences (test suite), defaults to false.')
+
      # Add default values and get args
      parser.set_defaults(sep=',')
      parser.set_defaults(chunksize='1M')
      parser.set_defaults(hashfunc='sha256')
      parser.set_defaults(progress=True)
+    parser.set_defaults(check=False)
      args = parser.parse_args()
  
      return args
@@ -199,3 +218,6 @@ if __name__ == "__main__":
      instance = CsvHasher(args)
  
      instance.run()
+
+    if args.check == True:
+        instance.check()
author	Silvio Rhatto <rhatto@riseup.net>
	Fri, 29 Jan 2021 01:10:13 +0000 (22:10 -0300)
committer	Silvio Rhatto <rhatto@riseup.net>
	Fri, 29 Jan 2021 01:10:13 +0000 (22:10 -0300)
Makefile		patch \| blob \| history
csv-hasher.py		patch \| blob \| history