Feat: change default and test params; use return status

author Silvio Rhatto <rhatto@riseup.net>

Fri, 29 Jan 2021 21:50:12 +0000 (18:50 -0300)

committer Silvio Rhatto <rhatto@riseup.net>

Fri, 29 Jan 2021 21:50:12 +0000 (18:50 -0300)
author Silvio Rhatto <rhatto@riseup.net>
Fri, 29 Jan 2021 21:50:12 +0000 (18:50 -0300)
committer Silvio Rhatto <rhatto@riseup.net>
Fri, 29 Jan 2021 21:50:12 +0000 (18:50 -0300)
diff --git a/Makefile b/Makefile

index 8732717eff141eee10a7f7895e5e9025aff91bd1..df79e73e884425c843c05ff9bf63d76edfa60ab7 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -2,10 +2,10 @@
  # Makefile for csv-hasher
  #
  
-CHUNKSIZE                                                      = 10000
-CHECK_LINES                                                    = 20
-SAMPLE_ITERATIONS         = 1000
-SAMPLE_ROWS_PER_ITERATION = 1000
+CHUNKSIZE                                                      = 64K
+CHECK_LINES                                                    = 16
+SAMPLE_ITERATIONS         = 1024
+SAMPLE_ROWS_PER_ITERATION = 1024
  TESTS                                                          = tests
  COLNAME                                                        = id
  SAMPLE                                                         = $(TESTS)/sample.csv
diff --git a/bin/provision b/bin/provision

index 89da22832e8a002468e68a79736cad7c5f055ffe..df1ef5a09322658294be596d960b94212a016340 100755 (executable)
--- a/bin/provision
+++ b/bin/provision
@@ -18,4 +18,4 @@
  # along with this program.  If not, see <http://www.gnu.org/licenses/>.
  
  # Setuo pipenv
-sudo apt install pipenv
+sudo apt install -y pipenv
diff --git a/csv-hasher.py b/csv-hasher.py

index c07adb0b7214e3a9eb70c89fdcb92b1be949bd3d..e76c7b0a18d19895b12f9777107c5a7e1091f29f 100755 (executable)
--- a/csv-hasher.py
+++ b/csv-hasher.py
@@ -90,7 +90,7 @@ class CsvHasher:
          # Check the input file
          if nlines < 2:
              print('CSV file is too small.')
-            exit (1)
+            return False
  
          # Holds columns definition
          columns = None
@@ -104,7 +104,7 @@ class CsvHasher:
          # Check for the column
          if self.args.colname[0] not in columns:
              print('Column not found: ' + self.args.colname[0])
-            exit (1)
+            return False
  
          # Start with an empty file
          try:
@@ -112,7 +112,7 @@ class CsvHasher:
                  f.truncate(0)
          except IOError:
              print('Error writing to ' + outfile)
-            exit(1)
+            return False
  
          # Initialize progress bar
          progress_bar = tqdm(total=nlines) if self.args.progress else False
@@ -136,7 +136,7 @@ class CsvHasher:
                  df[self.args.colname[0]] = self.apply_hash(df)
              except KeyError as e:
                  print('Column not found: ' + self.args.colname[0])
-                exit (1)
+                return False
  
              # Writing the new CSV output
              df.to_csv(outfile, index=False, mode='a', header=write_header)
@@ -173,7 +173,11 @@ def cmdline():
      :return: Command line arguments.
      """
  
-    basename = os.path.basename(__file__)
+    # Defaults
+    basename  = os.path.basename(__file__)
+    chunksize = '1M'
+    hashfunc  = 'sha256'
+    progress  = True
  
      # Parse CLI
      #examples  = "Examples:\n\t" + basename + " --no-progress \n"
@@ -190,23 +194,23 @@ def cmdline():
      parser.add_argument('--sep', dest='sep', help='Separator, defaults to ","')
  
      parser.add_argument('--chunksize', dest='chunksize',
-            help='Read chunks at a time, defaults to 1M, supports human-readable notation')
+            help='Read chunks at a time, supports human-readable notation, defaults to ' + chunksize)
  
-    parser.add_argument('--hashfunc', dest='hashfunc', help='Hash function, defaults do sha256')
+    parser.add_argument('--hashfunc', dest='hashfunc', help='Hash function, defaults do ' + hashfunc)
  
      parser.add_argument('--progress', dest='progress', action='store_true',
-                        help='Enable progress bar.')
+                        help='Enable progress bar, defaults to ' + str(progress))
  
      parser.add_argument('--no-progress', dest='progress', action='store_false',
                          help='Disable progress bar.')
  
      parser.add_argument('--check', dest='check', action='store_true',
-                        help='Check both files for differences (test suite), defaults to false.')
+                        help='Check both files for differences (test suite), defaults to ' + str(not progress))
  
      # Add default values and get args
      parser.set_defaults(sep=',')
-    parser.set_defaults(chunksize='1M')
-    parser.set_defaults(hashfunc='sha256')
+    parser.set_defaults(chunksize=chunksize)
+    parser.set_defaults(hashfunc=hashfunc)
      parser.set_defaults(progress=True)
      parser.set_defaults(check=False)
      args = parser.parse_args()
@@ -216,8 +220,10 @@ def cmdline():
  if __name__ == "__main__":
      args     = cmdline()
      instance = CsvHasher(args)
+    status   = instance.run()
  
-    instance.run()
+    if status is False:
+        exit(1)
  
      if args.check == True:
          instance.check()
diff --git a/csv-sampler.py b/csv-sampler.py

index 35d82dbbe2612c5e807ee9268afd1fb592744591..fa861a8ba3dbfe6e4e4b860b56a6cf2d2e921739 100755 (executable)
--- a/csv-sampler.py
+++ b/csv-sampler.py
@@ -66,7 +66,11 @@ def cmdline():
      :return: Command line arguments.
      """
  
-    basename = os.path.basename(__file__)
+    # Defaults
+    basename           = os.path.basename(__file__)
+    rows_per_iteration = 1024
+    iterations         = 1024
+    progress           = True
  
      # Parse CLI
      #examples  = "Examples:\n\t" + basename + " --no-progress \n"
@@ -76,23 +80,23 @@ def cmdline():
                                       epilog=epilog,
                                       formatter_class=argparse.RawDescriptionHelpFormatter,)
  
-    parser.add_argument('outfile',  nargs=1, help='CSV output file name')
+    parser.add_argument('outfile', nargs=1, help='CSV output file name')
  
      parser.add_argument('--rows_per_iteration', dest='rows_per_iteration',
-            type=int, help='Rows per iteration, defaults to 1000')
+            type=int, help='Rows per iteration, defaults to ' + str(rows_per_iteration))
  
      parser.add_argument('--iterations', dest='iterations',
-            help='Number of iterations, defaults to 1000')
+            help='Number of iterations, defaults to ' + str(iterations))
  
      parser.add_argument('--progress', dest='progress', action='store_true',
-                        help='Enable progress bar.')
+                        help='Enable progress bar, defaults to ' + str(progress))
  
      parser.add_argument('--no-progress', dest='progress', action='store_false',
                          help='Disable progress bar.')
  
      # Add default values and get args
-    parser.set_defaults(rows_per_iteration=1000)
-    parser.set_defaults(iterations=1000)
+    parser.set_defaults(rows_per_iteration=rows_per_iteration)
+    parser.set_defaults(iterations=iterations)
      parser.set_defaults(progress=True)
      args = parser.parse_args()
author	Silvio Rhatto <rhatto@riseup.net>
	Fri, 29 Jan 2021 21:50:12 +0000 (18:50 -0300)
committer	Silvio Rhatto <rhatto@riseup.net>
	Fri, 29 Jan 2021 21:50:12 +0000 (18:50 -0300)
Makefile		patch \| blob \| history
bin/provision		patch \| blob \| history
csv-hasher.py		patch \| blob \| history
csv-sampler.py		patch \| blob \| history