]> gitweb.fluxo.info Git - csv-hasher.git/commitdiff
Fix: coding style
authorSilvio Rhatto <rhatto@riseup.net>
Thu, 28 Jan 2021 19:16:02 +0000 (16:16 -0300)
committerSilvio Rhatto <rhatto@riseup.net>
Thu, 28 Jan 2021 19:16:02 +0000 (16:16 -0300)
csv-hasher.py

index 090b226862b5f97d5bc1930c5ca959dbabb20a61..e3f71e5027b66bdf8e7ccad49ef4c9771628b85b 100755 (executable)
@@ -45,6 +45,7 @@ class CsvHasher:
             exit (1)
 
     def apply_hash(self, df):
+        """Apply the hash function into a column from a dataframe"""
 
         return df[self.args.colname[0]].apply(lambda x: \
                 getattr(hashlib, self.args.hashfunc)(str(x).encode('utf-8')).hexdigest())
@@ -57,6 +58,7 @@ class CsvHasher:
         Thanks https://stackoverflow.com/questions/55775674/how-do-i-hash-specific-columns-from-a-csv-file
         Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309
         """
+
         # Read the CSV
         df = pd.read_csv(self.args.infile[0], sep=self.args.sep, iterator=True, chunksize=self.args.chunksize)
         df = pd.concat(tp, ignore_index=True)
@@ -73,6 +75,7 @@ class CsvHasher:
 
         Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309
         """
+
         infile = self.args.infile[0]
 
         # Get number of lines in the CSV file