]> gitweb.fluxo.info Git - csv-hasher.git/commitdiff
Feat: initial version
authorSilvio Rhatto <rhatto@riseup.net>
Thu, 28 Jan 2021 18:50:11 +0000 (15:50 -0300)
committerSilvio Rhatto <rhatto@riseup.net>
Thu, 28 Jan 2021 18:50:11 +0000 (15:50 -0300)
12 files changed:
.gitignore
CODE_OF_CONDUCT.md [new file with mode: 0644]
Makefile
Pipfile [new file with mode: 0644]
Pipfile.lock [new file with mode: 0644]
README.md
bin/make-sample [new file with mode: 0755]
bin/provision [new file with mode: 0755]
csv-hasher [new symlink]
csv-hasher.py [new file with mode: 0755]
kvmxfile
tests/.gitgnore [new file with mode: 0644]

index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..d1f825c87e78dd4e190b7ab3cf208428272558cb 100644 (file)
@@ -0,0 +1,2 @@
+tests/output.csv
+tests/sample.csv
diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md
new file mode 100644 (file)
index 0000000..234ed0f
--- /dev/null
@@ -0,0 +1,74 @@
+# Contributor Covenant Code of Conduct
+
+## Our Pledge
+
+In the interest of fostering an open and welcoming environment, we as
+contributors and maintainers pledge to making participation in our project and
+our community a harassment-free experience for everyone, regardless of age, body
+size, disability, ethnicity, gender identity and expression, level of experience,
+nationality, personal appearance, race, religion, or sexual identity and
+orientation.
+
+## Our Standards
+
+Examples of behavior that contributes to creating a positive environment
+include:
+
+* Using welcoming and inclusive language
+* Being respectful of differing viewpoints and experiences
+* Gracefully accepting constructive criticism
+* Focusing on what is best for the community
+* Showing empathy towards other community members
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery and unwelcome sexual attention or
+  advances
+* Trolling, insulting/derogatory comments, and personal or political attacks
+* Public or private harassment
+* Publishing others' private information, such as a physical or electronic
+  address, without explicit permission
+* Other conduct which could reasonably be considered inappropriate in a
+  professional setting
+
+## Our Responsibilities
+
+Project maintainers are responsible for clarifying the standards of acceptable
+behavior and are expected to take appropriate and fair corrective action in
+response to any instances of unacceptable behavior.
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+## Scope
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community. Examples of
+representing a project or community include using an official project e-mail
+address, posting via an official social media account, or acting as an appointed
+representative at an online or offline event. Representation of a project may be
+further defined and clarified by project maintainers.
+
+## Enforcement
+
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the project team at [INSERT EMAIL ADDRESS]. All
+complaints will be reviewed and investigated and will result in a response that
+is deemed necessary and appropriate to the circumstances. The project team is
+obligated to maintain confidentiality with regard to the reporter of an incident.
+Further details of specific enforcement policies may be posted separately.
+
+Project maintainers who do not follow or enforce the Code of Conduct in good
+faith may face temporary or permanent repercussions as determined by other
+members of the project's leadership.
+
+## Attribution
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
+available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
+
+[homepage]: https://www.contributor-covenant.org
+
index 3a988d454e82afc08f9eef991c748194233e5f8a..16311a150c91d435a6ad50b4b9ae971267acd63c 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,50 +1,21 @@
 #
-# Global Makefile - https://templater.fluxo.info
+# Makefile for csv-hasher
 #
-# This Makefile contains basic, common targets and also includes
-# any Makefile.* available in the current folder.
-#
-
-# Port to serve content
-HTTP_PORT="8000"
-HTTP_SERVER="http.server"
 
-# Base to serve the content
-HTTP_BASE="."
+vendor:
+       pipenv install
 
-# Set CONTAINER based in what we have available in the system
-# This variable can be user in other, included Makefiles to handle virtualization tasks
-ifeq ($(shell which kvmx > /dev/null && test -s kvmxfile && echo yes), yes)
-       CONTAINER = kvmx
-else ifeq ($(shell which vagrant > /dev/null && test -s Vagrantfile && echo yes), yes)
-       CONTAINER = vagrant
-else ifeq ($(shell which docker > /dev/null && test -s Dockerfile && echo yes), yes)
-       CONTAINER = docker
-else
-  CONTAINER = ''
-endif
+sample:
+       bin/make-sample 200
 
-# See http://unix.stackexchange.com/questions/32182/simple-command-line-http-server#32200
-#     http://php.net/manual/en/features.commandline.webserver.php
-serve:
-       @if [ "$(HTTP_SERVER)" = "SimpleHTTPServer" ]; then cd $(HTTP_BASE) && python -m SimpleHTTPServer $(HTTP_PORT);              fi
-       @if [ "$(HTTP_SERVER)" = "ssi_server"       ]; then cd $(HTTP_BASE) && PYTHONDONTWRITEBYTECODE=0 ssi_server.py $(HTTP_PORT); fi
-       @if [ "$(HTTP_SERVER)" = "http.server"      ]; then cd $(HTTP_BASE) && python3 -m http.server $(HTTP_PORT);                  fi
-       @if [ "$(HTTP_SERVER)" = "php"              ]; then cd $(HTTP_BASE) && php -S localhost:$(HTTP_PORT);                        fi
+test-sample:
+       pipenv run ./csv-hasher.py --chunksize 5 tests/sample.csv tests/output.csv id
 
-# Configure a git post-receive hook
-post_receive:
-       git config receive.denyCurrentBranch ignore
-       test -s bin/post-receive && cd .git/hooks && ln -sf ../../bin/post-receive
+show-test-output:
+       head -20 tests/sample.csv
+       head -20 tests/output.csv
 
-# Process any other Makefile whose filename matches Makefile.*
-# See https://www.gnu.org/software/make/manual/html_node/Include.html
-#
-# Some of those files might even contain local customizations/overrides
-# that can be .gitignore'd, like a Makefile.local for example.
--include Makefile.*
+clean-sample:
+       rm tests/*.csv
 
-# Customization examples can be as simple as setting variables:
-#CONTAINER  = vagrant
-#CONTAINER  = docker
-#DESTDIR   ?= vendor
+test: clean-sample sample test-sample show-test-output clean-sample
diff --git a/Pipfile b/Pipfile
new file mode 100644 (file)
index 0000000..b143e89
--- /dev/null
+++ b/Pipfile
@@ -0,0 +1,13 @@
+[[source]]
+url = "https://pypi.python.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+pandas = "*"
+tqdm = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.7"
diff --git a/Pipfile.lock b/Pipfile.lock
new file mode 100644 (file)
index 0000000..194341f
--- /dev/null
@@ -0,0 +1,112 @@
+{
+    "_meta": {
+        "hash": {
+            "sha256": "19ab6829f09294559ac6466b24082f8537cb5c7be2d6aec8bbe7b18814d3d587"
+        },
+        "pipfile-spec": 6,
+        "requires": {
+            "python_version": "3.7"
+        },
+        "sources": [
+            {
+                "name": "pypi",
+                "url": "https://pypi.python.org/simple",
+                "verify_ssl": true
+            }
+        ]
+    },
+    "default": {
+        "numpy": {
+            "hashes": [
+                "sha256:012426a41bc9ab63bb158635aecccc7610e3eff5d31d1eb43bc099debc979d94",
+                "sha256:06fab248a088e439402141ea04f0fffb203723148f6ee791e9c75b3e9e82f080",
+                "sha256:0eef32ca3132a48e43f6a0f5a82cb508f22ce5a3d6f67a8329c81c8e226d3f6e",
+                "sha256:1ded4fce9cfaaf24e7a0ab51b7a87be9038ea1ace7f34b841fe3b6894c721d1c",
+                "sha256:2e55195bc1c6b705bfd8ad6f288b38b11b1af32f3c8289d6c50d47f950c12e76",
+                "sha256:2ea52bd92ab9f768cc64a4c3ef8f4b2580a17af0a5436f6126b08efbd1838371",
+                "sha256:36674959eed6957e61f11c912f71e78857a8d0604171dfd9ce9ad5cbf41c511c",
+                "sha256:384ec0463d1c2671170901994aeb6dce126de0a95ccc3976c43b0038a37329c2",
+                "sha256:39b70c19ec771805081578cc936bbe95336798b7edf4732ed102e7a43ec5c07a",
+                "sha256:400580cbd3cff6ffa6293df2278c75aef2d58d8d93d3c5614cd67981dae68ceb",
+                "sha256:43d4c81d5ffdff6bae58d66a3cd7f54a7acd9a0e7b18d97abb255defc09e3140",
+                "sha256:50a4a0ad0111cc1b71fa32dedd05fa239f7fb5a43a40663269bb5dc7877cfd28",
+                "sha256:603aa0706be710eea8884af807b1b3bc9fb2e49b9f4da439e76000f3b3c6ff0f",
+                "sha256:6149a185cece5ee78d1d196938b2a8f9d09f5a5ebfbba66969302a778d5ddd1d",
+                "sha256:759e4095edc3c1b3ac031f34d9459fa781777a93ccc633a472a5468587a190ff",
+                "sha256:7fb43004bce0ca31d8f13a6eb5e943fa73371381e53f7074ed21a4cb786c32f8",
+                "sha256:811daee36a58dc79cf3d8bdd4a490e4277d0e4b7d103a001a4e73ddb48e7e6aa",
+                "sha256:8b5e972b43c8fc27d56550b4120fe6257fdc15f9301914380b27f74856299fea",
+                "sha256:99abf4f353c3d1a0c7a5f27699482c987cf663b1eac20db59b8c7b061eabd7fc",
+                "sha256:a0d53e51a6cb6f0d9082decb7a4cb6dfb33055308c4c44f53103c073f649af73",
+                "sha256:a12ff4c8ddfee61f90a1633a4c4afd3f7bcb32b11c52026c92a12e1325922d0d",
+                "sha256:a4646724fba402aa7504cd48b4b50e783296b5e10a524c7a6da62e4a8ac9698d",
+                "sha256:a76f502430dd98d7546e1ea2250a7360c065a5fdea52b2dffe8ae7180909b6f4",
+                "sha256:a9d17f2be3b427fbb2bce61e596cf555d6f8a56c222bd2ca148baeeb5e5c783c",
+                "sha256:ab83f24d5c52d60dbc8cd0528759532736b56db58adaa7b5f1f76ad551416a1e",
+                "sha256:aeb9ed923be74e659984e321f609b9ba54a48354bfd168d21a2b072ed1e833ea",
+                "sha256:c843b3f50d1ab7361ca4f0b3639bf691569493a56808a0b0c54a051d260b7dbd",
+                "sha256:cae865b1cae1ec2663d8ea56ef6ff185bad091a5e33ebbadd98de2cfa3fa668f",
+                "sha256:cc6bd4fd593cb261332568485e20a0712883cf631f6f5e8e86a52caa8b2b50ff",
+                "sha256:cf2402002d3d9f91c8b01e66fbb436a4ed01c6498fffed0e4c7566da1d40ee1e",
+                "sha256:d051ec1c64b85ecc69531e1137bb9751c6830772ee5c1c426dbcfe98ef5788d7",
+                "sha256:d6631f2e867676b13026e2846180e2c13c1e11289d67da08d71cacb2cd93d4aa",
+                "sha256:dbd18bcf4889b720ba13a27ec2f2aac1981bd41203b3a3b27ba7a33f88ae4827",
+                "sha256:df609c82f18c5b9f6cb97271f03315ff0dbe481a2a02e56aeb1b1a985ce38e60"
+            ],
+            "version": "==1.19.5"
+        },
+        "pandas": {
+            "hashes": [
+                "sha256:050ed2c9d825ef36738e018454e6d055c63d947c1d52010fbadd7584f09df5db",
+                "sha256:055647e7f4c5e66ba92c2a7dcae6c2c57898b605a3fb007745df61cc4015937f",
+                "sha256:23ac77a3a222d9304cb2a7934bb7b4805ff43d513add7a42d1a22dc7df14edd2",
+                "sha256:2de012a36cc507debd9c3351b4d757f828d5a784a5fc4e6766eafc2b56e4b0f5",
+                "sha256:30e9e8bc8c5c17c03d943e8d6f778313efff59e413b8dbdd8214c2ed9aa165f6",
+                "sha256:324e60bea729cf3b55c1bf9e88fe8b9932c26f8669d13b928e3c96b3a1453dff",
+                "sha256:37443199f451f8badfe0add666e43cdb817c59fa36bceedafd9c543a42f236ca",
+                "sha256:47ec0808a8357ab3890ce0eca39a63f79dcf941e2e7f494470fe1c9ec43f6091",
+                "sha256:496fcc29321e9a804d56d5aa5d7ec1320edfd1898eee2f451aa70171cf1d5a29",
+                "sha256:50e6c0a17ef7f831b5565fd0394dbf9bfd5d615ee4dd4bb60a3d8c9d2e872323",
+                "sha256:5527c5475d955c0bc9689c56865aaa2a7b13c504d6c44f0aadbf57b565af5ebd",
+                "sha256:57d5c7ac62925a8d2ab43ea442b297a56cc8452015e71e24f4aa7e4ed6be3d77",
+                "sha256:9d45f58b03af1fea4b48e44aa38a819a33dccb9821ef9e1d68f529995f8a632f",
+                "sha256:b26e2dabda73d347c7af3e6fed58483161c7b87a886a4e06d76ccfe55a044aa9",
+                "sha256:cfd237865d878da9b65cfee883da5e0067f5e2ff839e459466fb90565a77bda3",
+                "sha256:d7cca42dba13bfee369e2944ae31f6549a55831cba3117e17636955176004088",
+                "sha256:fe7de6fed43e7d086e3d947651ec89e55ddf00102f9dd5758763d56d182f0564"
+            ],
+            "index": "pypi",
+            "version": "==1.2.1"
+        },
+        "python-dateutil": {
+            "hashes": [
+                "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c",
+                "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"
+            ],
+            "version": "==2.8.1"
+        },
+        "pytz": {
+            "hashes": [
+                "sha256:16962c5fb8db4a8f63a26646d8886e9d769b6c511543557bc84e9569fb9a9cb4",
+                "sha256:180befebb1927b16f6b57101720075a984c019ac16b1b7575673bea42c6c3da5"
+            ],
+            "version": "==2020.5"
+        },
+        "six": {
+            "hashes": [
+                "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259",
+                "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"
+            ],
+            "version": "==1.15.0"
+        },
+        "tqdm": {
+            "hashes": [
+                "sha256:4621f6823bab46a9cc33d48105753ccbea671b68bab2c50a9f0be23d4065cb5a",
+                "sha256:fe3d08dd00a526850568d542ff9de9bbc2a09a791da3c334f3213d8d0bbbca65"
+            ],
+            "index": "pypi",
+            "version": "==4.56.0"
+        }
+    },
+    "develop": {}
+}
index 7111eb0df2f2768f4e4ac4151719327e32981353..0e9531a7ea81c25d7144660b577837b50413ef85 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1,2 +1,20 @@
-csv-hasher
-==========
+# csv-hasher
+
+Hashes a given column of a CSV file.
+
+## Requirements
+
+Running:
+
+* [Python 3](https://python.org).
+* [Pandas](https://pandas.pydata.org).
+
+Testing:
+
+* [GNU Make](https://www.gnu.org/software/make/).
+* [Pipenv](https://pipenv.pypa.io).
+
+## Testing
+
+    make vendor
+    make test
diff --git a/bin/make-sample b/bin/make-sample
new file mode 100755 (executable)
index 0000000..c282a30
--- /dev/null
@@ -0,0 +1,59 @@
+#!/bin/bash
+#
+# Build a sample dataset.
+#
+# Copyright (C) 2021 Silvio Rhatto - rhatto@riseup.net
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Parameters
+DIRNAME="`dirname $0`"
+TESTS="$DIRNAME/../tests"
+SAMPLE="$TESTS/sample.csv"
+SEPARATOR=","
+COLS="id a b c d e f g h i"
+NCOLS="`echo $COLS | wc -w`"
+ENTRIES="${1:-20}"
+
+# Ensure the test folder and sample file exists
+mkdir -p $TESTS
+touch $SAMPLE
+echo -n "" > $SAMPLE
+
+# Write sample header
+n=1
+for col in $COLS; do
+  if ((n < $NCOLS)); then
+    echo -n "$col""$SEPARATOR" >> $SAMPLE
+  else
+    echo -n "$col" >> $SAMPLE
+  fi
+
+  let n++
+done
+
+echo "" >> $SAMPLE
+
+# Write some rows
+let limit="$NCOLS - 1"
+for n in `seq 1 $ENTRIES`; do
+  #echo -n "$n" >> $SAMPLE
+  echo -n "$RANDOM" >> $SAMPLE
+
+  for n in `seq 1 $limit`; do
+    echo -n "$SEPARATOR""$RANDOM" >> $SAMPLE
+  done
+
+  echo "" >> $SAMPLE
+done
diff --git a/bin/provision b/bin/provision
new file mode 100755 (executable)
index 0000000..89da228
--- /dev/null
@@ -0,0 +1,21 @@
+#!/bin/bash
+#
+# Provision development environment.
+#
+# Copyright (C) 2021 Silvio Rhatto - rhatto@riseup.net
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+# Setuo pipenv
+sudo apt install pipenv
diff --git a/csv-hasher b/csv-hasher
new file mode 120000 (symlink)
index 0000000..b0c5fe1
--- /dev/null
@@ -0,0 +1 @@
+csv-hasher.py
\ No newline at end of file
diff --git a/csv-hasher.py b/csv-hasher.py
new file mode 100755 (executable)
index 0000000..71c3593
--- /dev/null
@@ -0,0 +1,164 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Hash a given column from a CSV file.
+#
+# Copyright (C) 2021 Silvio Rhatto - rhatto@riseup.net
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+import os
+import argparse
+import pandas as pd
+import hashlib
+import subprocess
+from sys  import exit
+from tqdm import tqdm
+
+class CsvHasher:
+    """Hashes a column from a CSV file"""
+
+    def __init__(self, args):
+        # Save arguments
+        self.args = args
+
+        # Check if source file exists
+        if not os.path.exists(args.infile[0]):
+            print('File not found: ' + args.infile[0])
+            exit (1)
+
+        if hasattr(hashlib, self.args.hashfunc) is False:
+            print('Invalid hash function ' + self.args.hashfunc)
+            exit (1)
+
+    def apply_hash(self, df):
+        return df[self.args.colname[0]].apply(lambda x: \
+                getattr(hashlib, self.args.hashfunc)(str(x).encode('utf-8')).hexdigest())
+
+    def run_legacy(self):
+        """
+        Process CSV in "legacy" mode: open the input file, process and write the output in a single step.
+        This won't work with CSVs larger than the available memory in the system.
+
+        Thanks https://stackoverflow.com/questions/55775674/how-do-i-hash-specific-columns-from-a-csv-file
+        Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309
+        """
+        # Read the CSV
+        df = pd.read_csv(self.args.infile[0], sep=self.args.sep, iterator=True, chunksize=self.args.chunksize)
+        df = pd.concat(tp, ignore_index=True)
+
+        # Hashing the column
+        df[self.args.colname[0]] = self.apply_hash(df)
+
+        # Writing the new CSV output
+        df.to_csv(self.args.outfile[0], index=False)
+
+    def run(self):
+        """
+        Improved CSV processor for large files.
+
+        Thanks https://stackoverflow.com/questions/11622652/large-persistent-dataframe-in-pandas/12193309#12193309
+        """
+        infile = self.args.infile[0]
+
+        # Get number of lines in the CSV file
+        nlines = subprocess.check_output('wc -l %s' % infile, shell=True)
+        nlines = int(nlines.split()[0])
+
+        if nlines < 2:
+            print('CSV file is too small.')
+            exit (1)
+
+        # Read the just to get the column names
+        sample_tp = pd.read_csv(self.args.infile[0], sep=self.args.sep, iterator=True, chunksize=self.args.chunksize)
+        sample    = pd.concat(sample_tp, ignore_index=True)
+
+        # Initialize progress bar
+        progress_bar = tqdm(total=nlines) if self.args.progress else False
+
+        write_header = True
+
+        for i in range(0, nlines, self.args.chunksize):
+            df = pd.read_csv(infile,
+                    sep=self.args.sep,
+                    header=None,               # no header, define column header manually later
+                    nrows=self.args.chunksize, # number of rows to read at each iteration
+                    skiprows=i)                # skip rows that were already read
+
+            # Add column information
+            df.columns = sample.columns
+
+            # Hashing the column
+            df[self.args.colname[0]] = self.apply_hash(df)
+
+            # Writing the new CSV output
+            df.to_csv(self.args.outfile[0], index=False, mode='a', header=write_header)
+
+            # Write the header only in the first iteration
+            write_header = False
+
+            if hasattr(progress_bar, 'update'):
+                progress_bar.update(self.args.chunksize)
+
+        # Teardown
+        if hasattr(progress_bar, 'close'):
+            progress_bar.close()
+
+def cmdline():
+    """
+    Evalutate the command line.
+
+    :return: Command line arguments.
+    """
+
+    basename = os.path.basename(__file__)
+
+    # Parse CLI
+    #examples  = "Examples:\n\t" + basename + " --no-progress \n"
+
+    epilog = ''
+    parser = argparse.ArgumentParser(description='Hashes a column from a CSV file.',
+                                     epilog=epilog,
+                                     formatter_class=argparse.RawDescriptionHelpFormatter,)
+
+    parser.add_argument('infile',   nargs=1, help='CSV input file name')
+    parser.add_argument('outfile',  nargs=1, help='CSV output file name')
+    parser.add_argument('colname',  nargs=1, help='Column name')
+
+    parser.add_argument('--sep', dest='sep', help='Separator, defaults to ","')
+
+    parser.add_argument('--chunksize', dest='chunksize', type=int, help='Read chunks at a time, defaults to 1000')
+
+    parser.add_argument('--hashfunc', dest='hashfunc', help='Hash function, defaults do sha256')
+
+    parser.add_argument('--progress', dest='progress', action='store_true',
+                        help='Enable progress bar.')
+
+    parser.add_argument('--no-progress', dest='progress', action='store_false',
+                        help='Disable progress bar.')
+
+    # Add default values and get args
+    parser.set_defaults(sep=',')
+    parser.set_defaults(chunksize=1000)
+    parser.set_defaults(hashfunc='sha256')
+    parser.set_defaults(progress=True)
+    args = parser.parse_args()
+
+    return args
+
+if __name__ == "__main__":
+    args     = cmdline()
+    instance = CsvHasher(args)
+
+    instance.run()
index 4a6f7f39863d1241bb32ba74e5e665b322780720..7a342f5c02f3bf7c58707e1d35f8fea949869f0a 100644 (file)
--- a/kvmxfile
+++ b/kvmxfile
@@ -7,6 +7,7 @@ hostname="csv-hasher"
 
 # Which base box you should use. Leave unconfigured to use kvmx-create instead.
 #basebox="buster"
+basebox="dev"
 
 # First user name
 user="user"
@@ -31,10 +32,10 @@ net="user"
 # Set this is you want to be able to share a single folder between host and guest.
 # Needs ssh_support set to "y" and a workable SSH connection to the guest.
 shared_folder="."
-shared_folder_mountpoint="/home/$user/code/$VM"
+#shared_folder_mountpoint="/home/$user/code/$VM"
 #shared_folder="$HOME/temp/shared/$VM"
 #shared_folder_mountpoint="/home/$user/temp/shared/$VM"
-#shared_folder_mountpoint="/srv/shared"
+shared_folder_mountpoint="/srv/shared"
 #shared_folder_mountpoint="/srv/kvmx"
 #shared_folder_mountpoint="/vagrant"
 
@@ -74,6 +75,7 @@ shared_folder_mountpoint="/home/$user/code/$VM"
 #provision_command="/usr/local/share/kvmx/provision/debian/development"
 #provision_command="/usr/local/share/kvmx/provision/debian/trashman"
 #provision_command="/usr/local/share/kvmx/provision/debian/desktop-basic"
+provision_command="/usr/local/share/kvmx/provision/debian/development && /srv/shared/bin/provision"
 
 # Startup command
 #startup_command="/path/to/custom/command"
@@ -102,7 +104,7 @@ vnc_client="virt-viewer"
 spice="1"
 
 # Set this if you want to attach an spice client when the machine boots.
-run_spice_client="1"
+run_spice_client="0"
 
 # SPICE client
 #spice_client="spicec"
diff --git a/tests/.gitgnore b/tests/.gitgnore
new file mode 100644 (file)
index 0000000..e69de29