From: Silvio Rhatto Date: Thu, 28 Jan 2021 19:16:02 +0000 (-0300) Subject: Fix: coding style X-Git-Url: https://gitweb.fluxo.info/?a=commitdiff_plain;h=fc5d9ba9e251db721a525974e6a8a7acde58814a;p=csv-hasher.git Fix: coding style --- diff --git a/csv-hasher.py b/csv-hasher.py index 090b226..e3f71e5 100755 --- a/csv-hasher.py +++ b/csv-hasher.py @@ -45,6 +45,7 @@ class CsvHasher: exit (1) def apply_hash(self, df): + """Apply the hash function into a column from a dataframe""" return df[self.args.colname[0]].apply(lambda x: \ getattr(hashlib, self.args.hashfunc)(str(x).encode('utf-8')).hexdigest()) @@ -57,6 +58,7 @@ class CsvHasher: Thanks https://stackoverflow.com/questions/55775674/how-do-i-hash-specific-columns-from-a-csv-file Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309 """ + # Read the CSV df = pd.read_csv(self.args.infile[0], sep=self.args.sep, iterator=True, chunksize=self.args.chunksize) df = pd.concat(tp, ignore_index=True) @@ -73,6 +75,7 @@ class CsvHasher: Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309 """ + infile = self.args.infile[0] # Get number of lines in the CSV file